inputs.ramp moved mapping and rest related stuff into separate modules

saezlab · Oct 24, 2024 · a6c930a · a6c930a
1 parent 7937d56
commit a6c930a
Show file tree

Hide file tree

Showing 2 changed files with 119 additions and 0 deletions.
diff --git a/pypath/inputs/ramp/_mapping.py b/pypath/inputs/ramp/_mapping.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+#
+#  This file is part of the `pypath` python module
+#
+#  Copyright 2014-2024
+#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
+#
+#  Authors: see the file `README.rst`
+#  Contact: Dénes Türei ([email protected])
+#
+#  Distributed under the GPLv3 License.
+#  See accompanying file LICENSE.txt or copy at
+#      https://www.gnu.org/licenses/gpl-3.0.html
+#
+#  Website: https://pypath.omnipathdb.org/
+#
+
+from __future__ import annotations
+
+
+def ramp_mapping(
+        id_type_a: str,
+        id_type_b: str,
+        return_df: bool = False,
+        curies: bool = False,
+    ) -> dict[str, set[str]] | pd.DataFrame:
+    """
+    Retrieve the mapping between two identifiers.
+
+    Args:
+        id_type_a:
+            The identifier type of the first identifier.
+        id_type_b:
+            The identifier type of the second identifier.
+        return_df:
+            Return a pandas DataFrame instead of a dictionary.
+        curies:
+            Do not remove CURIEs from the identifiers.
+
+    Returns:
+        A dictionary with the mapping between the two identifiers.
+    """
+
+    query = (
+        'SELECT DISTINCT a.sourceId as id_type_a, b.sourceId as id_type_b '
+        'FROM '
+        '   (SELECT sourceId, rampId '
+        '    FROM source '
+        f'   WHERE geneOrCompound = "compound" AND IDtype = "{id_type_a}") a '
+        'JOIN '
+        '   (SELECT sourceId, rampId '
+        '    FROM source '
+        f'   WHERE geneOrCompound = "compound" AND IDtype = "{id_type_b}") b '
+        'ON a.rampId = b.rampId;'
+    )
+
+    con = ramp_raw(tables = 'source', sqlite = True)
+    df = pd.read_sql_query(query, con)
+
+    if not curies:
+
+        df[df.columns] = df[df.columns].apply(
+            lambda y: [x.split(':', maxsplit = 1)[-1] for x in y],
+        )
+
+    return (
+        df
+            if return_df else
+        df.groupby('id_type_a')['id_type_b'].apply(set).to_dict()
+    )
diff --git a/pypath/inputs/ramp/_rest.py b/pypath/inputs/ramp/_rest.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+#
+#  This file is part of the `pypath` python module
+#
+#  Copyright 2014-2024
+#  EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
+#
+#  Authors: see the file `README.rst`
+#  Contact: Dénes Türei ([email protected])
+#
+#  Distributed under the GPLv3 License.
+#  See accompanying file LICENSE.txt or copy at
+#      https://www.gnu.org/licenses/gpl-3.0.html
+#
+#  Website: https://pypath.omnipathdb.org/
+#
+
+from __future__ import annotations
+
+import json
+
+import pypath.resources.urls as urls
+import pypath.share.curl as curl
+
+def ramp_id_types(
+        entity_type: Literal['gene', 'compound'] | None = None,
+    ) -> set[str]:
+    """
+    List the identifier types of the RaMP database.
+    """
+
+    entity_types = {
+        'compound': 'Metabolites',
+        'gene': 'Genes/Proteins',
+    }
+
+    url = urls.urls['ramp']['api'] % 'id-types'
+    c = curl.Curl(url, silent = True, large = False)
+
+    return {
+        id_type.strip()
+        for i in json.loads(c.result)['data']
+        if not entity_type or i['analyteType'] == entity_types[entity_type]
+        for id_type in i['idTypes'].split(',')
+    }