Skip to content

Commit

Permalink
Made num_samples actually be number of samples in STONED (#67)
Browse files Browse the repository at this point in the history
  • Loading branch information
whitead authored Jan 17, 2022
1 parent 8a96722 commit 4e280ab
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 11 deletions.
6 changes: 5 additions & 1 deletion docs/source/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
Change Log
==========

v0.5.2 (2021-1-4)
v0.6.0 (2022-1-17)
-------------------
* Changed behavior of num_samples so that it is not affected by mutation count in STONED

v0.5.2 (2022-1-4)
-------------------
* Fixed SMILES escaping in URL in chemed

Expand Down
14 changes: 8 additions & 6 deletions exmol/exmol.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def run_stoned(
:param s: SMILES string to start from
:param fp_type: Fingerprint type
:param num_samples: Number of molecules to generate per mutation
:param num_samples: Number of total molecules to generate
:param max_mutations: Maximum number of mutations
:param min_mutations: Minimum number of mutations
:param alphabet: Alphabet to use for mutations, typically from :func:`get_basic_alphabet()`
Expand All @@ -85,8 +85,9 @@ def run_stoned(
if mol == None:
raise Exception("Invalid starting structure encountered")

# want it so after sampling have num_samples
randomized_smile_orderings = [
stoned.randomize_smiles(mol) for _ in range(num_samples)
stoned.randomize_smiles(mol) for _ in range(num_samples // len(num_mutation_ls))
]

# Convert all the molecules to SELFIES
Expand Down Expand Up @@ -242,15 +243,16 @@ def sample_space(
) -> List[Example]:
"""Sample chemical space around given SMILES
This will evaluate the given function and run the :func:`run_stoned` function over chemical space around molecule.
This will evaluate the given function and run the :func:`run_stoned` function over chemical space around molecule. ``num_samples`` will be
set to 3,000 by default if using STONED and 150 if using ``chemed``.
:param origin_smiles: starting SMILES
:param f: A function which takes in SMILES and SELFIES and returns predicted value. Assumed to work with lists of SMILES/SELFIES unless `batched = False`
:param batched: If `f` is batched
:param preset: Can be wide, medium, or narrow. Determines how far across chemical space is sampled. Try `"chemed"` experimental preset to only sample commerically available compounds.
:param data: If not None and preset is `"custom"` will use this data instead of generating new ones.
:param method_kwargs: More control over STONED, CHEMED and CUSTOM can be set here. See :func:`run_stoned`, :func:`run_chemed` and :func:`run_custom`
:param num_samples: Number of desired samples. Can be set in `method_kwargs` (overrides) or here. `None` means default from preset.
:param num_samples: Number of desired samples. Can be set in `method_kwargs` (overrides) or here. `None` means default for preset
:param stoned_kwargs: Backwards compatible alias for `methods_kwargs`
:return: List of generated :obj:`Example`
"""
Expand All @@ -276,15 +278,15 @@ def batched_f(sm, se):
if method_kwargs is None:
method_kwargs = {}
if preset == "medium":
method_kwargs["num_samples"] = 1500 if num_samples is None else num_samples
method_kwargs["num_samples"] = 3000 if num_samples is None else num_samples
method_kwargs["max_mutations"] = 2
method_kwargs["alphabet"] = get_basic_alphabet()
elif preset == "narrow":
method_kwargs["num_samples"] = 3000 if num_samples is None else num_samples
method_kwargs["max_mutations"] = 1
method_kwargs["alphabet"] = get_basic_alphabet()
elif preset == "wide":
method_kwargs["num_samples"] = 600 if num_samples is None else num_samples
method_kwargs["num_samples"] = 3000 if num_samples is None else num_samples
method_kwargs["max_mutations"] = 5
method_kwargs["alphabet"] = sf.get_semantic_robust_alphabet()
elif preset == "chemed":
Expand Down
2 changes: 1 addition & 1 deletion exmol/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.5.2"
__version__ = "0.6.0"
14 changes: 11 additions & 3 deletions tests/test_exmol.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,24 @@ def test_sanitize_smiles():


# TODO let STONED people write these when they finish their repo


def test_run_stones():
def test_run_stoned():
result = exmol.run_stoned(
"N#CC=CC(C(=O)NCC1=CC=CC=C1C(=O)N)(C)CC2=CC=C(F)C=C2CC",
num_samples=10,
max_mutations=1,
)
# Can get duplicates
assert len(result[0]) >= 0
assert abs(len(result[0]) - 10) <= 1

result = exmol.run_stoned(
"N#CC=CC(C(=O)NCC1=CC=CC=C1C(=O)N)(C)CC2=CC=C(F)C=C2CC",
num_samples=12,
max_mutations=3,
)
# Can get duplicates
assert len(result[0]) >= 0
assert abs(len(result[0]) - 12) <= 1


def test_run_chemed():
Expand Down

0 comments on commit 4e280ab

Please sign in to comment.