From dab0350527bcb3b63966510ec7a6c02077158883 Mon Sep 17 00:00:00 2001 From: Lily Wang <31115101+lilyminium@users.noreply.github.com> Date: Fri, 22 Mar 2024 19:06:34 +1100 Subject: [PATCH] Fix conformer generation array typing and update CI (#97) * remove oe from examples ci * add alkane labelling as test * expand mapped smiles allowed values * add back ambertools * set openeye to false in ci * fix conformer array typing * rm accidental data * try just explicitly specifying ambertools * update docs env * just get away with docs env? * update CHANGELOG --- .github/workflows/examples-ci.yaml | 2 +- .github/workflows/gh-ci.yaml | 6 +- CHANGELOG.md | 8 +++ devtools/conda-envs/docs_env.yaml | 2 +- devtools/conda-envs/examples_env.yaml | 2 - devtools/conda-envs/test_env_dgl_false.yaml | 1 + devtools/conda-envs/test_env_dgl_true.yaml | 1 + .../train-gnn-notebook.ipynb | 2 +- openff/nagl/label/labels.py | 2 +- openff/nagl/tests/label/test_labels.py | 59 ++++++++++++++++++- 10 files changed, 74 insertions(+), 11 deletions(-) diff --git a/.github/workflows/examples-ci.yaml b/.github/workflows/examples-ci.yaml index a3d0115d..b5134dc9 100644 --- a/.github/workflows/examples-ci.yaml +++ b/.github/workflows/examples-ci.yaml @@ -34,7 +34,7 @@ jobs: python-version: ["3.9", "3.10", "3.11"] pydantic-version: ["2"] include-rdkit: [true] - include-openeye: [true] + include-openeye: [false] include-dgl: [true] exclude: # broken OpenMM build for Mac on 3.10 diff --git a/.github/workflows/gh-ci.yaml b/.github/workflows/gh-ci.yaml index ff349192..08d8da3d 100644 --- a/.github/workflows/gh-ci.yaml +++ b/.github/workflows/gh-ci.yaml @@ -42,9 +42,8 @@ jobs: # broken OpenMM build for Mac on 3.10 - os: "macOS-latest" python-version: "3.10" - # no dgl for 3.12 yet on Mac - - include-dgl: true - python-version: "3.12" + # Can't support 3.12 on Mac yet + - python-version: "3.12" os: "macOS-latest" # no openeye for 3.12 yet - include-openeye: true @@ -191,7 +190,6 @@ jobs: conda activate openff-nagl conda list - mamba env update --name openff-nagl --file devtools/conda-envs/test_env_dgl_false.yaml mamba env update --name openff-nagl --file devtools/conda-envs/docs_env.yaml python --version python -m pip install . --no-deps diff --git a/CHANGELOG.md b/CHANGELOG.md index 3d5a5d7f..61b90f46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,14 @@ The rules for this file: * accompany each entry with github issue/PR number (Issue #xyz) --> +## v0.3.6 -- 2024-03-22 + +### Authors +- @lilyminium + +### Fixed +- Fixed typing of conformer generation from RDKit (PR #97) + ## v0.3.5 -- 2024-03-21 ### Authors diff --git a/devtools/conda-envs/docs_env.yaml b/devtools/conda-envs/docs_env.yaml index b6cacacb..33dd3c3d 100644 --- a/devtools/conda-envs/docs_env.yaml +++ b/devtools/conda-envs/docs_env.yaml @@ -15,7 +15,7 @@ dependencies: - rich # chemistry - - openff-toolkit-base ==0.11.1 + - openff-toolkit-base >=0.11.1 - openff-units - pydantic <2.0 - rdkit diff --git a/devtools/conda-envs/examples_env.yaml b/devtools/conda-envs/examples_env.yaml index 383777d3..cce6a956 100644 --- a/devtools/conda-envs/examples_env.yaml +++ b/devtools/conda-envs/examples_env.yaml @@ -1,6 +1,5 @@ name: openff-nagl-test channels: - - openeye - conda-forge - defaults dependencies: @@ -25,7 +24,6 @@ dependencies: - openff-recharge - pydantic <3 - rdkit - - openeye-toolkits # database - pyarrow diff --git a/devtools/conda-envs/test_env_dgl_false.yaml b/devtools/conda-envs/test_env_dgl_false.yaml index 33af31fc..4056fe2e 100644 --- a/devtools/conda-envs/test_env_dgl_false.yaml +++ b/devtools/conda-envs/test_env_dgl_false.yaml @@ -21,6 +21,7 @@ dependencies: - pydantic <3 - rdkit - scipy + - ambertools # database - pyarrow diff --git a/devtools/conda-envs/test_env_dgl_true.yaml b/devtools/conda-envs/test_env_dgl_true.yaml index bbc51434..3b7cb7a1 100644 --- a/devtools/conda-envs/test_env_dgl_true.yaml +++ b/devtools/conda-envs/test_env_dgl_true.yaml @@ -22,6 +22,7 @@ dependencies: - pydantic <3 - rdkit - scipy + - ambertools # database - pyarrow diff --git a/examples/train-multi-objective-gnn/train-gnn-notebook.ipynb b/examples/train-multi-objective-gnn/train-gnn-notebook.ipynb index 97d827f6..e36e508c 100644 --- a/examples/train-multi-objective-gnn/train-gnn-notebook.ipynb +++ b/examples/train-multi-objective-gnn/train-gnn-notebook.ipynb @@ -1528,7 +1528,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/openff/nagl/label/labels.py b/openff/nagl/label/labels.py index 13851ea8..89deafd6 100644 --- a/openff/nagl/label/labels.py +++ b/openff/nagl/label/labels.py @@ -98,7 +98,7 @@ def apply( conformers = np.ravel([ conformer.m_as(unit.angstrom) for conformer in mol.conformers - ]) + ]).astype(float) data[self.conformer_column].append(conformers) data[self.n_conformer_column].append(len(mol.conformers)) diff --git a/openff/nagl/tests/label/test_labels.py b/openff/nagl/tests/label/test_labels.py index 51cbe46c..bb1fbf53 100644 --- a/openff/nagl/tests/label/test_labels.py +++ b/openff/nagl/tests/label/test_labels.py @@ -100,6 +100,63 @@ def test_label_with_conformers_on_fly(self, small_dataset): columns = ["mapped_smiles", "conformers", "n_conformers", "charges"] assert small_dataset.dataset.schema.names == columns + def test_label_alkane_dataset(self): + # test conformer generation and labelling + # as in examples + + training_alkanes = [ + 'C', + 'CC', + 'CCC', + 'CCCC', + 'CC(C)C', + 'CCCCC', + 'CC(C)CC', + 'CCCCCC', + 'CC(C)CCC', + 'CC(CC)CC', + ] + + training_dataset = LabelledDataset.from_smiles( + "training_data", + training_alkanes, + mapped=False, + overwrite_existing=True, + ) + training_df = training_dataset.to_pandas() + assert training_df.mapped_smiles[0] in ( + "[H:2][C:1]([H:3])([H:4])[H:5]", + "[C:1]([H:2])([H:3])([H:4])[H:5]" + ) + + label_conformers = LabelConformers( + # create a new 'conformers' with output conformers + conformer_column="conformers", + # create a new 'n_conformers' with number of conformers + n_conformer_column="n_conformers", + n_conformer_pool=500, # initially generate 500 conformers + n_conformers=10, # prune to max 10 conformers + rms_cutoff=0.05, + ) + + label_am1_charges = LabelCharges( + charge_method="am1-mulliken", # AM1 + # use previously generate conformers instead of new ones + use_existing_conformers=True, + # use the 'conformers' column as input for charge assignment + conformer_column="conformers", + # write generated charges to 'target-am1-charges' column + charge_column="target-am1-charges", + ) + + labellers = [ + label_conformers, # generate initial conformers, + label_am1_charges, + ] + + training_dataset.apply_labellers(labellers) + + class TestLabelMultipleDipoles: @@ -174,4 +231,4 @@ def test_apply_label(self, dataset_with_conformers_and_charges): calculated_esps = pydict["esps"] for esps, lengths in zip(calculated_esps, calculated_esp_lengths): - assert len(esps) == sum(lengths) \ No newline at end of file + assert len(esps) == sum(lengths)