From dab0350527bcb3b63966510ec7a6c02077158883 Mon Sep 17 00:00:00 2001
From: Lily Wang <31115101+lilyminium@users.noreply.github.com>
Date: Fri, 22 Mar 2024 19:06:34 +1100
Subject: [PATCH] Fix conformer generation array typing and update CI (#97)

* remove oe from examples ci

* add alkane labelling as test

* expand mapped smiles allowed values

* add back ambertools

* set openeye to false in ci

* fix conformer array typing

* rm accidental data

* try just explicitly specifying ambertools

* update docs env

* just get away with docs env?

* update CHANGELOG
---
 .github/workflows/examples-ci.yaml            |  2 +-
 .github/workflows/gh-ci.yaml                  |  6 +-
 CHANGELOG.md                                  |  8 +++
 devtools/conda-envs/docs_env.yaml             |  2 +-
 devtools/conda-envs/examples_env.yaml         |  2 -
 devtools/conda-envs/test_env_dgl_false.yaml   |  1 +
 devtools/conda-envs/test_env_dgl_true.yaml    |  1 +
 .../train-gnn-notebook.ipynb                  |  2 +-
 openff/nagl/label/labels.py                   |  2 +-
 openff/nagl/tests/label/test_labels.py        | 59 ++++++++++++++++++-
 10 files changed, 74 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/examples-ci.yaml b/.github/workflows/examples-ci.yaml
index a3d0115d..b5134dc9 100644
--- a/.github/workflows/examples-ci.yaml
+++ b/.github/workflows/examples-ci.yaml
@@ -34,7 +34,7 @@ jobs:
           python-version: ["3.9", "3.10", "3.11"]
           pydantic-version: ["2"]
           include-rdkit: [true]
-          include-openeye: [true]
+          include-openeye: [false]
           include-dgl: [true]
           exclude:
             # broken OpenMM build for Mac on 3.10
diff --git a/.github/workflows/gh-ci.yaml b/.github/workflows/gh-ci.yaml
index ff349192..08d8da3d 100644
--- a/.github/workflows/gh-ci.yaml
+++ b/.github/workflows/gh-ci.yaml
@@ -42,9 +42,8 @@ jobs:
             # broken OpenMM build for Mac on 3.10
             - os: "macOS-latest"
               python-version: "3.10"
-            # no dgl for 3.12 yet on Mac
-            - include-dgl: true
-              python-version: "3.12"
+            # Can't support 3.12 on Mac yet
+            - python-version: "3.12"
               os: "macOS-latest"
             # no openeye for 3.12 yet
             - include-openeye: true
@@ -191,7 +190,6 @@ jobs:
         conda activate openff-nagl
         conda list
     
-        mamba env update --name openff-nagl --file devtools/conda-envs/test_env_dgl_false.yaml
         mamba env update --name openff-nagl --file devtools/conda-envs/docs_env.yaml
         python --version
         python -m pip install . --no-deps
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3d5a5d7f..61b90f46 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,14 @@ The rules for this file:
   * accompany each entry with github issue/PR number (Issue #xyz)
 -->
 
+## v0.3.6 -- 2024-03-22
+
+### Authors
+- @lilyminium
+
+### Fixed
+- Fixed typing of conformer generation from RDKit (PR #97)
+
 ## v0.3.5 -- 2024-03-21
 
 ### Authors
diff --git a/devtools/conda-envs/docs_env.yaml b/devtools/conda-envs/docs_env.yaml
index b6cacacb..33dd3c3d 100644
--- a/devtools/conda-envs/docs_env.yaml
+++ b/devtools/conda-envs/docs_env.yaml
@@ -15,7 +15,7 @@ dependencies:
   - rich
 
   # chemistry
-  - openff-toolkit-base ==0.11.1
+  - openff-toolkit-base >=0.11.1
   - openff-units
   - pydantic <2.0
   - rdkit
diff --git a/devtools/conda-envs/examples_env.yaml b/devtools/conda-envs/examples_env.yaml
index 383777d3..cce6a956 100644
--- a/devtools/conda-envs/examples_env.yaml
+++ b/devtools/conda-envs/examples_env.yaml
@@ -1,6 +1,5 @@
 name: openff-nagl-test
 channels:
-  - openeye
   - conda-forge
   - defaults
 dependencies:
@@ -25,7 +24,6 @@ dependencies:
   - openff-recharge
   - pydantic <3
   - rdkit
-  - openeye-toolkits
 
   # database
   - pyarrow
diff --git a/devtools/conda-envs/test_env_dgl_false.yaml b/devtools/conda-envs/test_env_dgl_false.yaml
index 33af31fc..4056fe2e 100644
--- a/devtools/conda-envs/test_env_dgl_false.yaml
+++ b/devtools/conda-envs/test_env_dgl_false.yaml
@@ -21,6 +21,7 @@ dependencies:
   - pydantic <3
   - rdkit
   - scipy
+  - ambertools
 
   # database
   - pyarrow
diff --git a/devtools/conda-envs/test_env_dgl_true.yaml b/devtools/conda-envs/test_env_dgl_true.yaml
index bbc51434..3b7cb7a1 100644
--- a/devtools/conda-envs/test_env_dgl_true.yaml
+++ b/devtools/conda-envs/test_env_dgl_true.yaml
@@ -22,6 +22,7 @@ dependencies:
   - pydantic <3
   - rdkit
   - scipy
+  - ambertools
 
   # database
   - pyarrow
diff --git a/examples/train-multi-objective-gnn/train-gnn-notebook.ipynb b/examples/train-multi-objective-gnn/train-gnn-notebook.ipynb
index 97d827f6..e36e508c 100644
--- a/examples/train-multi-objective-gnn/train-gnn-notebook.ipynb
+++ b/examples/train-multi-objective-gnn/train-gnn-notebook.ipynb
@@ -1528,7 +1528,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.7"
+   "version": "3.11.4"
   }
  },
  "nbformat": 4,
diff --git a/openff/nagl/label/labels.py b/openff/nagl/label/labels.py
index 13851ea8..89deafd6 100644
--- a/openff/nagl/label/labels.py
+++ b/openff/nagl/label/labels.py
@@ -98,7 +98,7 @@ def apply(
             conformers = np.ravel([
                 conformer.m_as(unit.angstrom)
                 for conformer in mol.conformers
-            ])
+            ]).astype(float)
             data[self.conformer_column].append(conformers)
             data[self.n_conformer_column].append(len(mol.conformers))
         
diff --git a/openff/nagl/tests/label/test_labels.py b/openff/nagl/tests/label/test_labels.py
index 51cbe46c..bb1fbf53 100644
--- a/openff/nagl/tests/label/test_labels.py
+++ b/openff/nagl/tests/label/test_labels.py
@@ -100,6 +100,63 @@ def test_label_with_conformers_on_fly(self, small_dataset):
         columns = ["mapped_smiles", "conformers", "n_conformers", "charges"]
         assert small_dataset.dataset.schema.names == columns
 
+    def test_label_alkane_dataset(self):
+        # test conformer generation and labelling
+        # as in examples
+
+        training_alkanes = [
+            'C',
+            'CC',
+            'CCC',
+            'CCCC',
+            'CC(C)C',
+            'CCCCC',
+            'CC(C)CC',
+            'CCCCCC',
+            'CC(C)CCC',
+            'CC(CC)CC',
+        ]
+
+        training_dataset = LabelledDataset.from_smiles(
+            "training_data",
+            training_alkanes,
+            mapped=False,
+            overwrite_existing=True,
+        )
+        training_df = training_dataset.to_pandas()
+        assert training_df.mapped_smiles[0] in (
+            "[H:2][C:1]([H:3])([H:4])[H:5]",
+            "[C:1]([H:2])([H:3])([H:4])[H:5]"
+        )
+
+        label_conformers = LabelConformers(
+            # create a new 'conformers' with output conformers
+            conformer_column="conformers",
+            # create a new 'n_conformers' with number of conformers
+            n_conformer_column="n_conformers",
+            n_conformer_pool=500, # initially generate 500 conformers
+            n_conformers=10, # prune to max 10 conformers
+            rms_cutoff=0.05,
+        )
+
+        label_am1_charges = LabelCharges(
+            charge_method="am1-mulliken", # AM1
+            # use previously generate conformers instead of new ones
+            use_existing_conformers=True,
+            # use the 'conformers' column as input for charge assignment
+            conformer_column="conformers",
+            # write generated charges to 'target-am1-charges' column
+            charge_column="target-am1-charges",
+        )
+
+        labellers = [
+            label_conformers, # generate initial conformers,
+            label_am1_charges,
+        ]
+
+        training_dataset.apply_labellers(labellers)
+
+
 
 class TestLabelMultipleDipoles:
     
@@ -174,4 +231,4 @@ def test_apply_label(self, dataset_with_conformers_and_charges):
 
         calculated_esps = pydict["esps"]
         for esps, lengths in zip(calculated_esps, calculated_esp_lengths):
-            assert len(esps) == sum(lengths)
\ No newline at end of file
+            assert len(esps) == sum(lengths)