From 4d97378ea292d6acd005138fc0db31bdb260346d Mon Sep 17 00:00:00 2001
From: Kieran Didi <58345129+kierandidi@users.noreply.github.com>
Date: Wed, 17 Apr 2024 10:24:45 +0100
Subject: [PATCH] exposed fill_value to protein_to_pyg function (#385)

* exposed fill_value to protein_to_pyg function

* added to CHANGELOG

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 CHANGELOG.md                  | 3 ++-
 graphein/protein/tensor/io.py | 5 ++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f867abc7..9929c5bf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,7 +6,8 @@
 * Fix bug where the `deprotonate` argument is not wired up to `graphein.protein.graphs.construct_graphs`. [#375](https://github.com/a-r-j/graphein/pull/375)
 
 #### Misc
-* Updated Foldcomp datasets with improved setup function and updated database choices such as ESMAtlas [#382](https://github.com/a-r-j/graphein/pull/382)
+* exposed `fill_value` option to `protein_to_pyg` function. [#385](https://github.com/a-r-j/graphein/pull/385)
+* Updated Foldcomp datasets with improved setup function and updated database choices such as ESMAtlas. [#382](https://github.com/a-r-j/graphein/pull/382)
 * Resolve issue with notebook version and `pluggy` in Dockerfile. [#372](https://github.com/a-r-j/graphein/pull/372)
 * Remove `typing_extension` as dependency since we now primarily support Python >=3.8 and `Literal` is included in `typing` there.
 
diff --git a/graphein/protein/tensor/io.py b/graphein/protein/tensor/io.py
index eeaa93e4..cc074ecd 100644
--- a/graphein/protein/tensor/io.py
+++ b/graphein/protein/tensor/io.py
@@ -108,6 +108,7 @@ def protein_to_pyg(
     atom_types: List[str] = PROTEIN_ATOMS,
     remove_nonstandard: bool = True,
     store_het: bool = False,
+    fill_value_coords: float = 1e-5,
 ) -> Data:
     """
     Parses a protein (from either: a PDB code, PDB file or a UniProt ID
@@ -237,7 +238,9 @@ def protein_to_pyg(
         df["residue_id"] = df.residue_id + ":" + df.insertion
 
     out = Data(
-        coords=protein_df_to_tensor(df, atoms_to_keep=atom_types),
+        coords=protein_df_to_tensor(
+            df, atoms_to_keep=atom_types, fill_value=fill_value_coords
+        ),
         residues=get_sequence(
             df,
             chains=chain_selection,