From 4d97378ea292d6acd005138fc0db31bdb260346d Mon Sep 17 00:00:00 2001 From: Kieran Didi <58345129+kierandidi@users.noreply.github.com> Date: Wed, 17 Apr 2024 10:24:45 +0100 Subject: [PATCH] exposed fill_value to protein_to_pyg function (#385) * exposed fill_value to protein_to_pyg function * added to CHANGELOG * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- CHANGELOG.md | 3 ++- graphein/protein/tensor/io.py | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f867abc7..9929c5bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,8 @@ * Fix bug where the `deprotonate` argument is not wired up to `graphein.protein.graphs.construct_graphs`. [#375](https://github.com/a-r-j/graphein/pull/375) #### Misc -* Updated Foldcomp datasets with improved setup function and updated database choices such as ESMAtlas [#382](https://github.com/a-r-j/graphein/pull/382) +* exposed `fill_value` option to `protein_to_pyg` function. [#385](https://github.com/a-r-j/graphein/pull/385) +* Updated Foldcomp datasets with improved setup function and updated database choices such as ESMAtlas. [#382](https://github.com/a-r-j/graphein/pull/382) * Resolve issue with notebook version and `pluggy` in Dockerfile. [#372](https://github.com/a-r-j/graphein/pull/372) * Remove `typing_extension` as dependency since we now primarily support Python >=3.8 and `Literal` is included in `typing` there. diff --git a/graphein/protein/tensor/io.py b/graphein/protein/tensor/io.py index eeaa93e4..cc074ecd 100644 --- a/graphein/protein/tensor/io.py +++ b/graphein/protein/tensor/io.py @@ -108,6 +108,7 @@ def protein_to_pyg( atom_types: List[str] = PROTEIN_ATOMS, remove_nonstandard: bool = True, store_het: bool = False, + fill_value_coords: float = 1e-5, ) -> Data: """ Parses a protein (from either: a PDB code, PDB file or a UniProt ID @@ -237,7 +238,9 @@ def protein_to_pyg( df["residue_id"] = df.residue_id + ":" + df.insertion out = Data( - coords=protein_df_to_tensor(df, atoms_to_keep=atom_types), + coords=protein_df_to_tensor( + df, atoms_to_keep=atom_types, fill_value=fill_value_coords + ), residues=get_sequence( df, chains=chain_selection,