From 47d9dde5e5dc2cd085269520d2ccf34ced1d6b68 Mon Sep 17 00:00:00 2001 From: Arian Jamasb Date: Wed, 10 May 2023 23:02:39 +0100 Subject: [PATCH] Accounts for selenocysteine in sidechain torsion angle computation (#316) * add PSW to nonstandard residues * improve insertion and non-standard residue handling * refactor chain selection * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove unused verbosity arg * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix chain selection in tests * fix chain selection in tutorial notebook * fix notebook chain selection * fix chain selection typehint * Update changelog * Add NLW to non-standard residues * add .ent support * add entry for construction from dataframe * add missing stage arg * improve obsolete mapping retrieving to include entries with no replacement * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update changelog * add transforms to foldcomp datasets * fix jaxtyping syntax * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update changelog * fix double application of transforms * improve foldcomp data loading performance * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove unused imports * linting * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update changelog * add B factors to FC parsing output * bugfix to alpha & kappa angle embedding * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update changelog * handle selenocysteine in sidechain torsion angle computation * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- graphein/protein/tensor/angles.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/graphein/protein/tensor/angles.py b/graphein/protein/tensor/angles.py index 3d03f753..3fd47282 100644 --- a/graphein/protein/tensor/angles.py +++ b/graphein/protein/tensor/angles.py @@ -70,10 +70,18 @@ def _extract_torsion_coords( res_atoms = [] idxs = [] + # Whether or not the protein contains selenocysteine + selenium = coords.shape[1] == 38 + # Iterate over residues and grab indices of the atoms for each Chi angle for i, res in enumerate(res_types): res_coords = [] - for angle_coord_set in CHI_ANGLES_ATOMS[res]: + + angle_groups = CHI_ANGLES_ATOMS[res] + if not selenium and res == "SEC": + angle_groups = [] + + for angle_coord_set in angle_groups: res_coords.append([ATOM_NUMBERING[i] for i in angle_coord_set]) idxs.append(i) res_atoms.append(torch.tensor(res_coords, device=coords.device)) @@ -115,6 +123,9 @@ def sidechain_torsion( :return: _description_ :rtype: Union[TorsionTensor, Tuple[TorsionTensor, torch.Tensor]] """ + # Whether or not the protein contains selenocysteine + selenium = coords.shape[1] == 38 + idxs, coords = _extract_torsion_coords(coords, res_types) angles = _dihedral_angle( coords[:, 0, :].unsqueeze(1), @@ -139,7 +150,11 @@ def sidechain_torsion( res_types = copy.deepcopy(res_types) res_types.reverse() for res in res_types: - if res in ["ALA", "GLY", "UNK"]: + PAD_RESIDUES = ["ALA", "GLY", "UNK"] + if not selenium: + PAD_RESIDUES.append("SEC") + + if res in PAD_RESIDUES: post_pad_len += 1 else: break