diff --git a/notebooks/008_application_grk/001_grk_structures_in_klifs.ipynb b/notebooks/008_application_grk/001_grk_structures_in_klifs.ipynb new file mode 100644 index 0000000..ffe7f25 --- /dev/null +++ b/notebooks/008_application_grk/001_grk_structures_in_klifs.ipynb @@ -0,0 +1,689 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "de774775-ab50-4ac6-8fd1-9edbf6bdb0a2", + "metadata": {}, + "source": [ + "# GRK structures in KLIFS" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "45f44548-22d2-45a1-8541-7200f2918477", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from opencadd.databases.klifs import setup_remote" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "58dfff58-5843-4d23-96f1-20a9ca6b7a4d", + "metadata": {}, + "outputs": [], + "source": [ + "klifs_session = setup_remote()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b0d78c63-0cdd-4bdf-b0cf-1b6042b2ce4b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
kinase.klifs_idkinase.klifs_namekinase.full_namekinase.gene_namekinase.uniprotspecies.klifs
016RHOKG protein-coupled receptor kinase 1GRK1Q15835Human
111BARK1adrenergic, beta, receptor kinase 1GRK2P25098Human
212BARK2adrenergic, beta, receptor kinase 2GRK3P35626Human
313GPRK4G protein-coupled receptor kinase 4GRK4P32298Human
414GPRK5G protein-coupled receptor kinase 5GRK5P34947Human
515GPRK6G protein-coupled receptor kinase 6GRK6P43250Human
617GPRK7G protein-coupled receptor kinase 7GRK7Q8WTQ7Human
\n", + "
" + ], + "text/plain": [ + " kinase.klifs_id kinase.klifs_name kinase.full_name \\\n", + "0 16 RHOK G protein-coupled receptor kinase 1 \n", + "1 11 BARK1 adrenergic, beta, receptor kinase 1 \n", + "2 12 BARK2 adrenergic, beta, receptor kinase 2 \n", + "3 13 GPRK4 G protein-coupled receptor kinase 4 \n", + "4 14 GPRK5 G protein-coupled receptor kinase 5 \n", + "5 15 GPRK6 G protein-coupled receptor kinase 6 \n", + "6 17 GPRK7 G protein-coupled receptor kinase 7 \n", + "\n", + " kinase.gene_name kinase.uniprot species.klifs \n", + "0 GRK1 Q15835 Human \n", + "1 GRK2 P25098 Human \n", + "2 GRK3 P35626 Human \n", + "3 GRK4 P32298 Human \n", + "4 GRK5 P34947 Human \n", + "5 GRK6 P43250 Human \n", + "6 GRK7 Q8WTQ7 Human " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kinases = klifs_session.kinases.all_kinases(families=\"GRK\", species=\"Human\")\n", + "kinases" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "189c2c0c-baeb-4ac7-8de2-1d4e3c940065", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "16 11 12 13 14 15 17\n" + ] + } + ], + "source": [ + "kinase_klifs_ids = kinases[\"kinase.klifs_id\"].to_list()\n", + "print(*kinase_klifs_ids)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "0111493c-95ce-4ddc-8caa-b35d4c08ad19", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
kinase.klifs_idkinase.klifs_namekinase.full_namekinase.gene_namekinase.familykinase.groupkinase.subfamilyspecies.klifskinase.uniprotkinase.iupharkinase.pocket
011BARK1adrenergic, beta, receptor kinase 1GRK2GRKAGCBARKHumanP250981466RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD...
112BARK2adrenergic, beta, receptor kinase 2GRK3GRKAGCBARKHumanP356261467RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMTYACFILD...
213GPRK4G protein-coupled receptor kinase 4GRK4GRKAGCGRKHumanP322981468RVLGKGGFGEVCAYACKKLMALNEKRILEKVQRFVVSLAYACLVLT...
314GPRK5G protein-coupled receptor kinase 5GRK5GRKAGCGRKHumanP349471469RVLGKGGFGEVCAYACKRLMALNEKQILEKVNQFVVNLAYACLVLT...
415GPRK6G protein-coupled receptor kinase 6GRK6GRKAGCGRKHumanP432501470RVLGKGGFGEVCAYACKKLMALNEKQILEKVNRFVVSLAYACLVLT...
516RHOKG protein-coupled receptor kinase 1GRK1GRKAGCGRKHumanQ158351465RVLGKGGFGEVSAYACKKLGAMVEKKILMKVHRFIVSLAYACLVMT...
617GPRK7G protein-coupled receptor kinase 7GRK7GRKAGCGRKHumanQ8WTQ71471RVLGKGGFGEVCAYACKKLMALLEKEILEKVSPFIVSLAYACLVMS...
\n", + "
" + ], + "text/plain": [ + " kinase.klifs_id kinase.klifs_name kinase.full_name \\\n", + "0 11 BARK1 adrenergic, beta, receptor kinase 1 \n", + "1 12 BARK2 adrenergic, beta, receptor kinase 2 \n", + "2 13 GPRK4 G protein-coupled receptor kinase 4 \n", + "3 14 GPRK5 G protein-coupled receptor kinase 5 \n", + "4 15 GPRK6 G protein-coupled receptor kinase 6 \n", + "5 16 RHOK G protein-coupled receptor kinase 1 \n", + "6 17 GPRK7 G protein-coupled receptor kinase 7 \n", + "\n", + " kinase.gene_name kinase.family kinase.group kinase.subfamily species.klifs \\\n", + "0 GRK2 GRK AGC BARK Human \n", + "1 GRK3 GRK AGC BARK Human \n", + "2 GRK4 GRK AGC GRK Human \n", + "3 GRK5 GRK AGC GRK Human \n", + "4 GRK6 GRK AGC GRK Human \n", + "5 GRK1 GRK AGC GRK Human \n", + "6 GRK7 GRK AGC GRK Human \n", + "\n", + " kinase.uniprot kinase.iuphar \\\n", + "0 P25098 1466 \n", + "1 P35626 1467 \n", + "2 P32298 1468 \n", + "3 P34947 1469 \n", + "4 P43250 1470 \n", + "5 Q15835 1465 \n", + "6 Q8WTQ7 1471 \n", + "\n", + " kinase.pocket \n", + "0 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n", + "1 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMTYACFILD... \n", + "2 RVLGKGGFGEVCAYACKKLMALNEKRILEKVQRFVVSLAYACLVLT... \n", + "3 RVLGKGGFGEVCAYACKRLMALNEKQILEKVNQFVVNLAYACLVLT... \n", + "4 RVLGKGGFGEVCAYACKKLMALNEKQILEKVNRFVVSLAYACLVLT... \n", + "5 RVLGKGGFGEVSAYACKKLGAMVEKKILMKVHRFIVSLAYACLVMT... \n", + "6 RVLGKGGFGEVCAYACKKLMALLEKEILEKVSPFIVSLAYACLVMS... " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kinases = klifs_session.kinases.by_kinase_klifs_id(kinase_klifs_ids)\n", + "kinases" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e0e49df2-3d5c-43ce-b384-7a527569c4f2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of GRK structures: 41\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
structure.klifs_idstructure.pdb_idstructure.alternate_modelstructure.chainspecies.klifs_xkinase.klifs_idkinase.klifs_name_xkinase.namesstructure.pocketligand.expo_id...kinase.klifs_name_ykinase.full_namekinase.gene_namekinase.familykinase.groupkinase.subfamilyspecies.klifs_ykinase.uniprotkinase.iupharkinase.pocket
094405wg5BAHuman11BARK1<NA>RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD...ZSO...BARK1adrenergic, beta, receptor kinase 1GRK2GRKAGCBARKHumanP250981466RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD...
182565uvc-AHuman11BARK1<NA>RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD...8PV...BARK1adrenergic, beta, receptor kinase 1GRK2GRKAGCBARKHumanP250981466RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD...
294375wg5AAHuman11BARK1<NA>RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD...ZSO...BARK1adrenergic, beta, receptor kinase 1GRK2GRKAGCBARKHumanP250981466RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD...
394385wg4BAHuman11BARK1<NA>RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD...AFV...BARK1adrenergic, beta, receptor kinase 1GRK2GRKAGCBARKHumanP250981466RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD...
494395wg3-AHuman11BARK1<NA>RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD...AFM...BARK1adrenergic, beta, receptor kinase 1GRK2GRKAGCBARKHumanP250981466RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD...
\n", + "

5 rows × 54 columns

\n", + "
" + ], + "text/plain": [ + " structure.klifs_id structure.pdb_id structure.alternate_model \\\n", + "0 9440 5wg5 B \n", + "1 8256 5uvc - \n", + "2 9437 5wg5 A \n", + "3 9438 5wg4 B \n", + "4 9439 5wg3 - \n", + "\n", + " structure.chain species.klifs_x kinase.klifs_id kinase.klifs_name_x \\\n", + "0 A Human 11 BARK1 \n", + "1 A Human 11 BARK1 \n", + "2 A Human 11 BARK1 \n", + "3 A Human 11 BARK1 \n", + "4 A Human 11 BARK1 \n", + "\n", + " kinase.names structure.pocket \\\n", + "0 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n", + "1 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n", + "2 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n", + "3 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n", + "4 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n", + "\n", + " ligand.expo_id ... kinase.klifs_name_y \\\n", + "0 ZSO ... BARK1 \n", + "1 8PV ... BARK1 \n", + "2 ZSO ... BARK1 \n", + "3 AFV ... BARK1 \n", + "4 AFM ... BARK1 \n", + "\n", + " kinase.full_name kinase.gene_name kinase.family \\\n", + "0 adrenergic, beta, receptor kinase 1 GRK2 GRK \n", + "1 adrenergic, beta, receptor kinase 1 GRK2 GRK \n", + "2 adrenergic, beta, receptor kinase 1 GRK2 GRK \n", + "3 adrenergic, beta, receptor kinase 1 GRK2 GRK \n", + "4 adrenergic, beta, receptor kinase 1 GRK2 GRK \n", + "\n", + " kinase.group kinase.subfamily species.klifs_y kinase.uniprot \\\n", + "0 AGC BARK Human P25098 \n", + "1 AGC BARK Human P25098 \n", + "2 AGC BARK Human P25098 \n", + "3 AGC BARK Human P25098 \n", + "4 AGC BARK Human P25098 \n", + "\n", + " kinase.iuphar kinase.pocket \n", + "0 1466 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n", + "1 1466 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n", + "2 1466 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n", + "3 1466 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n", + "4 1466 RIIGRGGFGEVYGYAMKCLLALNERIMLSLVSPFIVCMSYASFILD... \n", + "\n", + "[5 rows x 54 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "structures = klifs_session.structures.by_kinase_klifs_id(kinase_klifs_ids)\n", + "structures = pd.merge(\n", + " structures.drop([\"kinase.family\", \"kinase.group\"], axis=1), kinases, on=[\"kinase.klifs_id\"]\n", + ")\n", + "print(f\"Number of GRK structures: {len(structures)}\")\n", + "structures.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "c9ba7827-1796-4a15-835f-3dcfb3f7f4e0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of KLIFS structures per kinase\n" + ] + }, + { + "data": { + "text/plain": [ + "kinase.gene_name\n", + "GRK2 29\n", + "GRK4 2\n", + "GRK5 4\n", + "GRK6 6\n", + "dtype: int64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(\"Number of KLIFS structures per kinase\")\n", + "structures.groupby(\"kinase.gene_name\").size()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96309789-cd99-4e5a-ad39-628e2f5bd753", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/008_application_grk/README.md b/notebooks/008_application_grk/README.md new file mode 100644 index 0000000..0c0c958 --- /dev/null +++ b/notebooks/008_application_grk/README.md @@ -0,0 +1,3 @@ +## `001_grk_structures_in_klifs.ipynb` + +GRK structures in KLIFS