diff --git a/api/c/indigo/indigo.h b/api/c/indigo/indigo.h index 80daa7b466..2103849b6b 100644 --- a/api/c/indigo/indigo.h +++ b/api/c/indigo/indigo.h @@ -672,6 +672,8 @@ CEXPORT double indigoTPSA(int molecule, int includeSP); CEXPORT int indigoNumRotatableBonds(int molecule); CEXPORT int indigoNumHydrogenBondAcceptors(int molecule); CEXPORT int indigoNumHydrogenBondDonors(int molecule); +CEXPORT double indigoCLogP(int molecule); +CEXPORT double indigoCMolarRefractivity(int molecule); CEXPORT const char* indigoCanonicalSmiles(int molecule); CEXPORT const char* indigoLayeredCode(int molecule); diff --git a/api/c/indigo/src/indigo_calc.cpp b/api/c/indigo/src/indigo_calc.cpp index 8473b32a84..d38bc05b7a 100644 --- a/api/c/indigo/src/indigo_calc.cpp +++ b/api/c/indigo/src/indigo_calc.cpp @@ -7,6 +7,7 @@ #include "indigo_molecule.h" #include "indigo_reaction.h" +#include "molecule/crippen.h" IndigoMoleculeGross::IndigoMoleculeGross() : IndigoObject(GROSS_MOLECULE) { @@ -191,3 +192,33 @@ CEXPORT int indigoNumHydrogenBondDonors(const int molecule) } INDIGO_END(-1); } + +CEXPORT double indigoCLogP(const int molecule) +{ + INDIGO_BEGIN + { + auto& obj = self.getObject(molecule); + if (IndigoMolecule::is(obj)) + { + auto& mol = obj.getMolecule(); + return Crippen::logP(mol); + } + throw IndigoError("incorrect object type for cLogP calculation: %s, should be molecule", obj.debugInfo()); + } + INDIGO_END(-1); +} + +CEXPORT double indigoCMolarRefractivity(const int molecule) +{ + INDIGO_BEGIN + { + auto& obj = self.getObject(molecule); + if (IndigoMolecule::is(obj)) + { + auto& mol = obj.getMolecule(); + return Crippen::molarRefractivity(mol); + } + throw IndigoError("incorrect object type for cLogP calculation: %s, should be molecule", obj.debugInfo()); + } + INDIGO_END(-1); +} diff --git a/api/cpp/src/IndigoMolecule.cpp b/api/cpp/src/IndigoMolecule.cpp index 7bb9136dea..389dcb11d7 100644 --- a/api/cpp/src/IndigoMolecule.cpp +++ b/api/cpp/src/IndigoMolecule.cpp @@ -56,3 +56,13 @@ int IndigoMolecule::numHydrogenBondDonors() const { return session()->_checkResult(indigoNumHydrogenBondDonors(id())); } + +double IndigoMolecule::cLogP() const +{ + return session()->_checkResultFloat(indigoCLogP(id())); +} + +double IndigoMolecule::cMolarRefractivity() const +{ + return session()->_checkResultFloat(indigoCMolarRefractivity(id())); +} diff --git a/api/cpp/src/IndigoMolecule.h b/api/cpp/src/IndigoMolecule.h index f163eb08d0..29ca4e7f54 100644 --- a/api/cpp/src/IndigoMolecule.h +++ b/api/cpp/src/IndigoMolecule.h @@ -39,6 +39,8 @@ namespace indigo_cpp int numRotatableBonds() const; int numHydrogenBondAcceptors() const; int numHydrogenBondDonors() const; + double cLogP() const; + double cMolarRefractivity() const; }; using IndigoMoleculeSPtr = std::shared_ptr; diff --git a/api/cpp/tests/basic/molecule.cpp b/api/cpp/tests/basic/molecule.cpp index ac872f8be3..fa2f41736c 100644 --- a/api/cpp/tests/basic/molecule.cpp +++ b/api/cpp/tests/basic/molecule.cpp @@ -107,3 +107,29 @@ TEST(Molecule, numHydrogenBondDonors) EXPECT_EQ(3, molecule.numHydrogenBondDonors()); } } + +TEST(Molecule, cLogP) +{ + const auto& session = IndigoSession::create(); + { + const auto& molecule = session->loadMolecule(CAFFEINE); + EXPECT_NEAR(0.062, molecule.cLogP(), 1e-2); + } + { + const auto& molecule = session->loadMolecule(SULFASALAZINE); + EXPECT_NEAR(3.7, molecule.cLogP(), 1e-2); + } +} + +TEST(Molecule, cMolarRefractivity) +{ + const auto& session = IndigoSession::create(); + { + const auto& molecule = session->loadMolecule(CAFFEINE); + EXPECT_NEAR(49.1, molecule.cMolarRefractivity(), 1e-2); + } + { + const auto& molecule = session->loadMolecule(SULFASALAZINE); + EXPECT_NEAR(100.73, molecule.cMolarRefractivity(), 1e-2); + } +} diff --git a/api/cpp/tests/common.h.in b/api/cpp/tests/common.h.in index 34cfd2f6a9..e204eaa464 100644 --- a/api/cpp/tests/common.h.in +++ b/api/cpp/tests/common.h.in @@ -3,6 +3,7 @@ #include #define METHANE "C" +#define BENZENE "C1=CC=CC=C1" #define CAFFEINE "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" #define SULFASALAZINE "C1=CC=NC(=C1)NS(=O)(=O)C2=CC=C(C=C2)N=NC3=CC(=C(C=C3)O)C(=O)O" diff --git a/api/dotnet/src/IndigoLib.cs b/api/dotnet/src/IndigoLib.cs index fa34d1e877..5abfa1f3d6 100644 --- a/api/dotnet/src/IndigoLib.cs +++ b/api/dotnet/src/IndigoLib.cs @@ -822,6 +822,12 @@ public static extern int indigoSetSGroupBrackets(int sgroup, int brk_style, floa [DllImport("indigo"), SuppressUnmanagedCodeSecurity] public static extern int indigoNumHydrogenBondDonors(int molecule); + [DllImport("indigo"), SuppressUnmanagedCodeSecurity] + public static extern double indigoCLogP(int molecule); + + [DllImport("indigo"), SuppressUnmanagedCodeSecurity] + public static extern double indigoCMolarRefractivity(int molecule); + [DllImport("indigo"), SuppressUnmanagedCodeSecurity] public static extern byte* indigoCanonicalSmiles(int molecule); diff --git a/api/dotnet/src/IndigoObject.cs b/api/dotnet/src/IndigoObject.cs index ae43cc950c..24768ff70d 100644 --- a/api/dotnet/src/IndigoObject.cs +++ b/api/dotnet/src/IndigoObject.cs @@ -1415,6 +1415,18 @@ public int numHydrogenBondDonors() return dispatcher.checkResult(IndigoLib.indigoNumHydrogenBondDonors(self)); } + public double cLogP() + { + dispatcher.setSessionID(); + return dispatcher.checkResult(IndigoLib.indigoCLogP(self)); + } + + public double cMolarRefractivity() + { + dispatcher.setSessionID(); + return dispatcher.checkResult(IndigoLib.indigoCMolarRefractivity(self)); + } + public string canonicalSmiles() { dispatcher.setSessionID(); diff --git a/api/java/indigo/src/main/java/com/epam/indigo/IndigoLib.java b/api/java/indigo/src/main/java/com/epam/indigo/IndigoLib.java index 843788c4ff..c850baf6ee 100644 --- a/api/java/indigo/src/main/java/com/epam/indigo/IndigoLib.java +++ b/api/java/indigo/src/main/java/com/epam/indigo/IndigoLib.java @@ -605,6 +605,10 @@ int indigoSetSGroupBrackets( int indigoNumHydrogenBondDonors(int molecule); + double indigoCLogP(int molecule); + + double indigoCMolarRefractivity(int molecule); + Pointer indigoCanonicalSmiles(int molecule); Pointer indigoLayeredCode(int molecule); diff --git a/api/java/indigo/src/main/java/com/epam/indigo/IndigoObject.java b/api/java/indigo/src/main/java/com/epam/indigo/IndigoObject.java index fbf3ed56f3..887a8d0f43 100644 --- a/api/java/indigo/src/main/java/com/epam/indigo/IndigoObject.java +++ b/api/java/indigo/src/main/java/com/epam/indigo/IndigoObject.java @@ -840,6 +840,16 @@ public int numHydrogenBondDonors() { return Indigo.checkResult(this, lib.indigoNumHydrogenBondDonors(self)); } + public double cLogP() { + dispatcher.setSessionID(); + return Indigo.checkResultDouble(this, lib.indigoCLogP(self)); + } + + public double cMolarRefractivity() { + dispatcher.setSessionID(); + return Indigo.checkResultDouble(this, lib.indigoCMolarRefractivity(self)); + } + public String canonicalSmiles() { dispatcher.setSessionID(); return Indigo.checkResultString(this, lib.indigoCanonicalSmiles(self)); diff --git a/api/python/indigo/__init__.py b/api/python/indigo/__init__.py index 8ce4424336..4c4997e50e 100644 --- a/api/python/indigo/__init__.py +++ b/api/python/indigo/__init__.py @@ -39,7 +39,6 @@ from indigo.exceptions import IndigoException from indigo.hybridization import get_hybridization -from indigo.logp import get_logp, get_mr from indigo.salts import SALTS DECODE_ENCODING = "utf-8" @@ -2584,21 +2583,27 @@ def iterateBonds(self): ), ) - def logP(self) -> float: - """Molecule method returns calculated logP value + def cLogP(self): + """Molecule method returns calculated Crippen logP value Returns: float: calculated logP value of the molecule """ - return get_logp(self) + self.dispatcher._setSessionId() + return self.dispatcher._checkResultFloat( + Indigo._lib.indigoCLogP(self.id) + ) - def molarRefractivity(self) -> float: - """Molecule method returns calculated molar refractivity + def cMolarRefractivity(self): + """Molecule method returns calculated Crippen molar refractivity Returns: float: calculated value of molar refractivity """ - return get_mr(self) + self.dispatcher._setSessionId() + return self.dispatcher._checkResultFloat( + Indigo._lib.indigoCMolarRefractivity(self.id) + ) def bondOrder(self): """Bond method returns bond order @@ -5068,6 +5073,10 @@ def __init__(self, path=None): Indigo._lib.indigoNumHydrogenBondAcceptors.argtypes = [c_int] Indigo._lib.indigoNumHydrogenBondDonors.restype = c_int Indigo._lib.indigoNumHydrogenBondDonors.argtypes = [c_int] + Indigo._lib.indigoCLogP.restype = c_double + Indigo._lib.indigoCLogP.argtypes = [c_int] + Indigo._lib.indigoCMolarRefractivity.restype = c_double + Indigo._lib.indigoCMolarRefractivity.argtypes = [c_int] Indigo._lib.indigoCanonicalSmiles.restype = c_char_p Indigo._lib.indigoCanonicalSmiles.argtypes = [c_int] Indigo._lib.indigoCanonicalSmarts.restype = c_char_p diff --git a/api/python/indigo/logp.py b/api/python/indigo/logp.py deleted file mode 100644 index 04d3d21190..0000000000 --- a/api/python/indigo/logp.py +++ /dev/null @@ -1,297 +0,0 @@ -# Implementation of the method from "Prediction of Physicochemical Parameters -# by Atomic Contributions" paper -# by Scott A. Wildman and Gordon M. Crippen - - -import csv -from collections import Counter, defaultdict -from typing import TYPE_CHECKING, Dict, Optional - -if TYPE_CHECKING: - from indigo import IndigoObject - -atom_types_table = { - "C1": [["[CH4]", "[CH3]C", "[CH2](C)C"], 0.1441, 2.503], - "C2": [["[CH](C)(C)C", "[C](C)(C)(C)C"], 0.0, 2.433], - "C3": [ - ["[CH3][N,O,P,S,F,Cl,Br,I]", "[CH2X4]([N,O,P,S,F,Cl,Br,I])[A;!#1]"], - -0.2035, - 2.753, - ], - "C4": [ - [ - "[CH1X4]([N,O,P,S,F,Cl,Br,I])([A;!#1])[A;!#1]", - "[CH0X4]([N,O,P,S,F,Cl,Br,I])([A;!#1])([A;!#1])[A;!#1]", - ], - -0.2051, - 2.731, - ], - "C5": [["[C]=[!C;A;!#1]"], -0.2783, 5.007], - "C6": [ - [ - "[CH2]=C", - "[CH1](=C)[A;!#1]", - "[CH0](=C)([A;!#1])[A;!#1]", - "[C](=C)=C", - ], - 0.1551, - 3.513, - ], - "C7": [["[CX2]#[A;!#1]"], 0.0017, 3.888], - "C8": [["[CH3]c"], 0.08452, 2.464], - "C9": [["[CH3]a"], -0.1444, 2.412], - "C10": [["[CH2X4]a"], -0.0516, 2.488], - "C11": [["[CHX4]a"], 0.1193, 2.582], - "C12": [["[CH0X4]a"], -0.0967, 2.576], - "C13": [["[cH0]-[A;!C;!N;!O;!S;!F;!Cl;!Br;!I;!#1]"], -0.5443, 4.041], - "C14": [["[c][#9]"], 0.0, 3.257], - "C15": [["[c][#17]"], 0.245, 3.564], - "C16": [["[c][#35]"], 0.198, 3.18], - "C17": [["[c][#53]"], 0.0, 3.104], - "C18": [["[cH]"], 0.1581, 3.35], - "C19": [["[c](:a)(:a):a"], 0.2955, 4.346], - "C20": [["[c](:a)(:a)-a"], 0.2713, 3.904], - "C21": [["[c](:a)(:a)-C"], 0.136, 3.509], - "C22": [["[c](:a)(:a)-N"], 0.4619, 4.067], - "C23": [["[c](:a)(:a)-O"], 0.5437, 3.853], - "C24": [["[c](:a)(:a)-S"], 0.1893, 2.673], - "C25": [["[c](:a)(:a)=[C,N,O]"], -0.8186, 3.135], - "C26": [ - ["[C](=C)(a)[A;!#1]", "[C](=C)(c)a", "[CH1](=C)a", "[C]=c"], - 0.264, - 4.305, - ], - "C27": [["[CX4][A;!C;!N;!O;!P;!S;!F;!Cl;!Br;!I;!#1]"], 0.2148, 2.693], - "C": [["[#6]"], 0.08129, 3.243], - "H1": [["[#1][#6,#1]"], 0.123, 1.057], - "H2": [ - ["[#1]O[CX4,c]", "[#1]O[!#6;!#7;!#8;!#16]", "[#1][!#6;!#7;!#8]"], - -0.2677, - 1.395, - ], - "H3": [["[#1][#7]", "[#1]O[#7]"], 0.2142, 0.9627], - "H4": [["[#1]OC=[#6,#7,O,S]", "[#1]O[O,S]"], 0.298, 1.805], - "H": [["[#1]"], 0.1125, 1.112], - "N1": [["[NH2+0][A;!#1]"], -1.019, 2.262], - "N2": [["[NH+0]([A;!#1])[A;!#1]"], -0.7096, 2.173], - "N3": [["[NH2+0]a"], -1.027, 2.827], - "N4": [["[NH1+0]([!#1;A,a])a"], -0.5188, 3.0], - "N5": [["[NH+0]=[!#1;A,a]"], 0.08387, 1.757], - "N6": [["[N+0](=[!#1;A,a])[!#1;A,a]"], 0.1836, 2.428], - "N7": [["[N+0]([A;!#1])([A;!#1])[A;!#1]"], -0.3187, 1.839], - "N8": [["[N+0](a)([!#1;A,a])[A;!#1]", "[N+0](a)(a)a"], -0.4458, 2.819], - "N9": [["[N+0]#[A;!#1]"], 0.01508, 1.725], - "N10": [["[NH3,NH2,NH;+,+2,+3]"], -1.95, "nan"], - "N11": [["[n+0]"], -0.3239, 2.202], - "N12": [["[n;+,+2,+3]"], -1.119, "nan"], - "N13": [ - [ - "[NH0;+,+2,+3]([A;!#1])([A;!#1])([A;!#1])[A;!#1]", - "[NH0;+,+2,+3](=[A;!#1])([A;!#1])[!#1;A,a]", - "[NH0;+,+2,+3](=[#6])=[#7]", - ], - -0.3396, - 0.2604, - ], - "N14": [ - ["[N;+,+2,+3]#[A;!#1]", "[N;-,-2,-3]", "[N;+,+2,+3](=[N;-,-2,-3])=N"], - 0.2887, - 3.359, - ], - "N": [["[#7]"], -0.4806, 2.134], - "O1": [["[o]"], 0.1552, 1.08], - "O2": [["[OH,OH2]"], -0.2893, 0.8238], - "O3": [["[O]([A;!#1])[A;!#1]"], -0.0684, 1.085], - "O4": [["[O](a)[!#1;A,a]"], -0.4195, 1.182], - "O5": [["[O]=[#7,#8]", "[OX1;-,-2,-3][#7]"], 0.0335, 3.367], - "O6": [["[OX1;-,-2,-2][#16]", "[O;-0]=[#16;-0]"], -0.3339, 0.7774], - "O12": [["[O-]C(=O)"], -1.326, "nan"], - "O7": [["[OX1;-,-2,-3][!#1;!N;!S]"], -1.189, 0.0], - "O8": [["[O]=c"], 0.1788, 3.135], - "O9": [ - [ - "[O]=[CH]C", - "[O]=C(C)([A;!#1])", - "[O]=[CH][N,O]", - "[O]=[CH2]", - "[O]=[CX2]=O", - ], - -0.1526, - 0.0, - ], - "O10": [ - ["[O]=[CH]c", "[O]=C([C,c])[a;!#1]", "[O]=C(c)[A;!#1]"], - 0.1129, - 0.2215, - ], - "O11": [["[O]=C([!#1;!#6])[!#1;!#6]"], 0.4833, 0.389], - "O": [["[#8]"], -0.1188, 0.6865], - "F": [["[#9-0]"], 0.4202, 1.108], - "Cl": [["[#17-0]"], 0.6895, 5.853], - "Br": [["[#35-0]"], 0.8456, 8.927], - "I": [["[#53-0]"], 0.8857, 14.02], - "F2": [["[#9-*]"], -2.996, "nan"], - "Cl2": [["[#17-*]"], -2.996, "nan"], - "Br2": [["[#35-*]"], -2.996, "nan"], - "I2": [["[#53-*]", "[#53+*]"], -2.996, "nan"], - "P": [["[#15]"], 0.8612, 6.92], - "S2": [ - ["[S;-,-2,-3,-4,+1,+2,+3,+5,+6]", "[S-0]=[N,O,P,S]"], - -0.0024, - 7.365, - ], - "S1": [["[S;A]"], 0.6482, 7.591], - "S3": [["[s;a]"], 0.6237, 6.691], - "Me1": [ - [ - "[#3,#11,#19,#37,#55]", - "[#4,#12,#20,#38,#56]", - "[#5,#13,#31,#49,#81]", - "[#14,#32,#50,#82]", - "[#33,#51,#83]", - "[#34,#52,#84]", - ], - -0.3808, - 5.754, - ], - "Me2": [ - [ - "[#21,#22,#23,#24,#25,#26,#27,#28,#29,#30]", - "[#39,#40,#41,#42,#43,#44,#45,#46,#47,#48]", - "[#72,#73,#74,#75,#76,#77,#78,#79,#80]", - ], - -0.0025, - "nan", - ], - "Hal": [ - ["[#9,#17,#35,#53;-]", "[#53;+,+2,+3]", "[+;#3,#11,#19,#37,#55]"], - -2.996, - "nan", - ], -} - - -class TypeTable: - """Creates dictionarys from atom type and contributions table""" - - def __init__(self, table: dict) -> None: - self.smarts = {k: v[0] for k, v in table.items()} - self.logp_contributions = {k: v[1] for k, v in table.items()} - self.mr_contributions = {k: v[2] for k, v in table.items()} - - -TABLE = TypeTable(atom_types_table) - - -def calculate_mr(types_counter: Dict[str, int], table: TypeTable) -> float: - """MR counter - - Args: - types_counter: counted matches of atom types - table: - Returns: - float: MR value - """ - values = [] - for t, n in types_counter.items(): - values.append(table.mr_contributions[t] * n) - return round(sum(values), 2) - - -def calculate_logp(types_counter: Dict[str, int], table: TypeTable) -> float: - """LogP counter - - Args: - types_counter: counted matches of atom types - table: - Returns: - float: logP value - """ - values = [] - for t, n in types_counter.items(): - values.append(table.logp_contributions[t] * n) - return round(sum(values), 2) - - -def get_matches( - m: "IndigoObject", table: TypeTable -) -> Optional[Dict[str, int]]: - """Uses substructure matcher object to find matches - - Args: - m: molecule with explicit hydrogens - table: - Returns: - dict: counted matches of atom types - """ - types_counter: Dict[str, int] = Counter() - matcher = m.dispatcher.substructureMatcher(m) - atoms = set() - for atom_type, smarts in table.smarts.items(): - for i in smarts: - query = m.dispatcher.loadSmarts(i) - for match in matcher.iterateMatches(query): - index = match.mapAtom(query.getAtom(0)).index() - if index not in atoms: - atoms.add(index) - types_counter[atom_type] += 1 - - if not m.countAtoms() == sum(types_counter.values()): - return None - - return types_counter - - -def get_logp(mol: "IndigoObject", table: TypeTable = TABLE) -> float: - """Returns logP value for a given molecule - - Args: - mol (IndigoObject): molecule - table: list of table rows - Returns: - float: calculated logP - """ - m = mol.clone() - m.unfoldHydrogens() - types = get_matches(m, table) - if not types: - return 0.0 - return calculate_logp(types, table) - - -def get_mr(mol: "IndigoObject", table: TypeTable = TABLE) -> float: - """Returns molar refractivity value for a given molecule - - Args: - mol (IndigoObject): molecule - table: list of table rows - Returns: - float: calculated MR - """ - m = mol.clone() - m.unfoldHydrogens() - types = get_matches(m, table) - if not types: - return 0.0 - return calculate_mr(types, table) - - -def load_table_from_csv(file: str) -> TypeTable: - """Transforms csv file into input for logP and MR calculation - - Args: - file: csv file name - Returns: - instance of TypeTable object - """ - custom_table = defaultdict(list) - with open(file, "r", encoding="utf-8") as f: - fields = ["type", "SMARTS", "logP", "MR"] - reader = csv.DictReader(f, fields, delimiter=";") - for row in reader: - custom_table[row["type"]].append(row["SMARTS"]) - custom_table[row["type"]].append(row["logP"]) - custom_table[row["type"]].append(row["MR"]) - - table_obj = TypeTable(custom_table) - return table_obj diff --git a/api/python/tests/test_indigo.py b/api/python/tests/test_indigo.py index 216f7e7e41..4cbf7f0642 100644 --- a/api/python/tests/test_indigo.py +++ b/api/python/tests/test_indigo.py @@ -10,24 +10,6 @@ def test_aromatize_smiles(self) -> None: m.aromatize() self.assertEqual("c1ccccc1", m.smiles()) - def test_logp_value(self) -> None: - m1 = self.indigo.loadMolecule("c1ccccc1") - m2 = self.indigo.loadMolecule("CU") - m3 = self.indigo.loadMolecule("CSc1ccc2Sc3ccccc3N(CCC4CCCCN4C)c2c1") - m4 = self.indigo.loadMolecule("Nc1ccccc1") - self.assertEqual(m1.logP(), 1.69) - self.assertEqual(m2.logP(), 0.0) - self.assertEqual(m3.logP(), 5.89) - self.assertEqual(m4.logP(), 1.27) - - def test_mr_value(self) -> None: - m1 = self.indigo.loadMolecule("c1ccccc1") - m2 = self.indigo.loadMolecule("CU") - m3 = self.indigo.loadMolecule("Clc1ccccc1") - self.assertEqual(m1.molarRefractivity(), 26.44) - self.assertEqual(m2.molarRefractivity(), 0.0) - self.assertEqual(m3.molarRefractivity(), 31.45) - def test_check_salt_monovalent_monoatomic_cation(self) -> None: m1 = self.indigo.loadMolecule("[Na+].C") m2 = self.indigo.loadMolecule("[Rb+].C") diff --git a/api/tests/integration/ref/calc/crippen.py.out b/api/tests/integration/ref/calc/crippen.py.out new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/integration/tests/calc/crippen.py b/api/tests/integration/tests/calc/crippen.py new file mode 100644 index 0000000000..f9bdda1548 --- /dev/null +++ b/api/tests/integration/tests/calc/crippen.py @@ -0,0 +1,37 @@ +import sys + +sys.path.append("../../common") +from env_indigo import * + + +def check_float(method, smiles, expected, delta=1e-2): + global indigo + m = indigo.loadMolecule(smiles) + actual = getattr(m, method)() + if abs(actual - expected) > delta: + print(m, method, actual, "!=", expected) + + +def test_clogp(): + check_float("cLogP", "c1ccccc1", 1.68) + check_float("cLogP", "C[U]", 0.5838) + check_float("cLogP", "CSc1ccc2Sc3ccccc3N(CCC4CCCCN4C)c2c1", 5.8856) + check_float("cLogP", "Nc1ccccc1", 1.2688) + check_float("cLogP", "CN1C=NC2=C1C(=O)N(C(=O)N2C)C", 0.06) + check_float( + "cLogP", + "C1=CC=NC(=C1)NS(=O)(=O)C2=CC=C(C=C2)N=NC3=CC(=C(C=C3)O)C(=O)O", + 3.7, + ) + + +def test_cmr(): + check_float("cMolarRefractivity", "c1ccccc1", 26.442) + check_float("cMolarRefractivity", "C[U]", 5.86) + check_float("cMolarRefractivity", "Clc1ccccc1", 31.45) + + +if __name__ == "__main__": + indigo = Indigo() + test_clogp() + test_cmr() diff --git a/core/indigo-core/molecule/crippen.h b/core/indigo-core/molecule/crippen.h new file mode 100644 index 0000000000..2e68c3fc8a --- /dev/null +++ b/core/indigo-core/molecule/crippen.h @@ -0,0 +1,31 @@ +/**************************************************************************** + * Copyright (C) from 2009 to Present EPAM Systems. + * + * This file is part of Indigo toolkit. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ***************************************************************************/ + +#pragma once + +namespace indigo +{ + class Molecule; + + class Crippen + { + public: + static double logP(Molecule& molecule); + static double molarRefractivity(Molecule& molecule); + }; +} diff --git a/core/indigo-core/molecule/src/crippen.cpp b/core/indigo-core/molecule/src/crippen.cpp new file mode 100644 index 0000000000..3da90cb0ea --- /dev/null +++ b/core/indigo-core/molecule/src/crippen.cpp @@ -0,0 +1,381 @@ +/**************************************************************************** + * Copyright (C) from 2009 to Present EPAM Systems. + * + * This file is part of Indigo toolkit. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ***************************************************************************/ + +#include "molecule/crippen.h" + +#include "base_cpp/scanner.h" +#include "molecule/molecule.h" +#include "molecule/molecule_substructure_matcher.h" +#include "molecule/smiles_loader.h" + +using namespace std; + +namespace +{ + using namespace indigo; + + const vector>>& getQueries() + { + // clang-format off + static vector>> queries{ + {"C1", {"[CH4]", "[CH3]C", "[CH2](C)C"}}, + {"C2", {"[CH](C)(C)C", "[C](C)(C)(C)C"}}, + {"C3", {"[CH3][N,O,P,S,F,Cl,Br,I]", "[CH2X4]([N,O,P,S,F,Cl,Br,I])[A;!#1]"}}, + {"C4", {"[CH1X4]([N,O,P,S,F,Cl,Br,I])([A;!#1])[A;!#1]", "[CH0X4]([N,O,P,S,F,Cl,Br,I])([A;!#1])([A;!#1])[A;!#1]"}}, + {"C5", {"[C]=[!C;A;!#1]"}}, + {"C6", {"[CH2]=C", "[CH1](=C)[A;!#1]", "[CH0](=C)([A;!#1])[A;!#1]", "[C](=C)=C"}}, + {"C7", {"[CX2]#[A;!#1]"}}, + {"C8", {"[CH3]c"}}, + {"C9", {"[CH3]a"}}, + {"C11", {"[CHX4]a"}}, + {"C12", {"[CH0X4]a"}}, + {"C13", {"[cH0]-[A;!C;!N;!O;!S;!F;!Cl;!Br;!I;!#1]"}}, + {"C14", {"[c][#9]"}}, + {"C15", {"[c][#17]"}}, + {"C16", {"[c][#35]"}}, + {"C17", {"[c][#53]"}}, + {"C18", {"[cH]"}}, + {"C19", {"[c](:a)(:a):a"}}, + {"C20", {"[c](:a)(:a)-a"}}, + {"C21", {"[c](:a)(:a)-C"}}, + {"C22", {"[c](:a)(:a)-N"}}, + {"C23", {"[c](:a)(:a)-O"}}, + {"C24", {"[c](:a)(:a)-S"}}, + {"C25", {"[c](:a)(:a)=[C,N,O]"}}, + {"C26", {"[C](=C)(a)[A;!#1]", "[C](=C)(c)a", "[CH1](=C)a", "[C]=c"}}, + {"C27", {"[CX4][A;!C;!N;!O;!P;!S;!F;!Cl;!Br;!I;!#1]"}}, + {"C", {"[#6]"}}, + {"H1", {"[#1][#6,#1]"}}, + {"H2", {"[#1]O[CX4,c]", "[#1]O[!#6;!#7;!#8;!#16]", "[#1][!#6;!#7;!#8]"}}, + {"H3", {"[#1][#7]", "[#1]O[#7]"}}, + {"H4", {"[#1]OC=[#6,#7,O,S]", "[#1]O[O,S]"}}, + {"H", {"[#1]"}}, + {"N1", {"[NH2+0][A;!#1]"}}, + {"N2", {"[NH+0]([A;!#1])[A;!#1]"}}, + {"N3", {"[NH2+0]a"}}, + {"N4", {"[NH1+0]([!#1;A,a])a"}}, + {"N5", {"[NH+0]=[!#1;A,a]"}}, + {"N6", {"[N+0](=[!#1;A,a])[!#1;A,a]"}}, + {"N7", {"[N+0]([A;!#1])([A;!#1])[A;!#1]"}}, + {"N8", {"[N+0](a)([!#1;A,a])[A;!#1]", "[N+0](a)(a)a"}}, + {"N9", {"[N+0]#[A;!#1]"}}, + {"N10", {"[NH3,NH2,NH;+,+2,+3]"}}, + {"N11", {"[n+0]"}}, + {"N12", {"[n;+,+2,+3]"}}, + {"N13", {"[NH0;+,+2,+3]([A;!#1])([A;!#1])([A;!#1])[A;!#1]", "[NH0;+,+2,+3](=[A;!#1])([A;!#1])[!#1;A,a]", "[NH0;+,+2,+3](=[#6])=[#7]"}}, + {"N14", {"[N;+,+2,+3]#[A;!#1]", "[N;-,-2,-3]", "[N;+,+2,+3](=[N;-,-2,-3])=N"}}, + {"N", {"[#7]"}}, + {"O1", {"[o]"}}, + {"O2", {"[OH,OH2]"}}, + {"O3", {"[O]([A;!#1])[A;!#1]"}}, + {"O4", {"[O](a)[!#1;A,a]"}}, + {"O5", {"[O]=[#7,#8]", "[OX1;-,-2,-3][#7]"}}, + {"O6", {"[OX1;-,-2,-2][#16]", "[O;-0]=[#16;-0]"}}, + {"O12", {"[O-]C(=O)"}}, + {"O7", {"[OX1;-,-2,-3][!#1;!N;!S]"}}, + {"O8", {"[O]=c"}}, + {"O9", {"[O]=[CH]C", "[O]=C(C)([A;!#1])", "[O]=[CH][N,O]", "[O]=[CH2]", "[O]=[CX2]=O"}}, + {"O10", {"[O]=[CH]c", "[O]=C([C,c])[a;!#1]", "[O]=C(c)[A;!#1]"}}, + {"O11", {"[O]=C([!#1;!#6])[!#1;!#6]"}}, + {"O", {"[#8]"}}, + {"F", {"[#9-0]"}}, + {"Cl", {"[#17-0]"}}, + {"Br", {"[#35-0]"}}, + {"I", {"[#53-0]"}}, + {"F2", {"[#9-*]"}}, + {"Cl2", {"[#17-*]"}}, + {"Br2", {"[#35-*]"}}, + {"I2", {"[#53-*]", "[#53+*]"}}, + {"P", {"[#15]"}}, + {"S2", {"[S;-,-2,-3,-4,+1,+2,+3,+5,+6]", "[S-0]=[N,O,P,S]"}}, + {"S1", {"[S;A]"}}, + {"S3", {"[s;a]"}}, + {"Me1", {"[#3,#11,#19,#37,#55]", "[#4,#12,#20,#38,#56]", "[#5,#13,#31,#49,#81]", "[#14,#32,#50,#82]", "[#33,#51,#83]", "[#34,#52,#84]"}}, + {"Me2", {"[#21,#22,#23,#24,#25,#26,#27,#28,#29,#30]", "[#39,#40,#41,#42,#43,#44,#45,#46,#47,#48]", "[#72,#73,#74,#75,#76,#77,#78,#79,#80]"}}, + {"Hal", {"[#9,#17,#35,#53;-]", "[#53;+,+2,+3]", "[+;#3,#11,#19,#37,#55]"}}, + }; + // clang-format on + return queries; + } + + const unordered_map& getLogPContributions() + { + // clang-format off + static unordered_map contributions{ + {"C1", 0.1441}, + {"C2", 0.0}, + {"C3", -0.2035}, + {"C4", -0.2051}, + {"C5", -0.2783}, + {"C6", 0.1551}, + {"C7", 0.0017}, + {"C8", 0.08452}, + {"C9", -0.1444}, + {"C10",-0.0516}, + {"C11", 0.1193}, + {"C12", -0.0967}, + {"C13", -0.5443}, + {"C14", 0.0}, + {"C15", 0.245}, + {"C16", 0.198}, + {"C17", 0.0}, + {"C18", 0.1581}, + {"C19", 0.2955}, + {"C20", 0.2713}, + {"C21", 0.136}, + {"C22", 0.4619}, + {"C23", 0.5437}, + {"C24", 0.1893}, + {"C25", -0.8186}, + {"C26", 0.264}, + {"C27", 0.2148}, + {"C", 0.08129}, + {"H1", 0.123}, + {"H2", -0.2677}, + {"H3", 0.2142}, + {"H4", 0.298}, + {"H", 0.1125}, + {"N1", -1.019}, + {"N2", -0.7096}, + {"N3", -1.027}, + {"N4", -0.5188}, + {"N5", 0.08387}, + {"N6", 0.1836}, + {"N7", -0.3187}, + {"N8", -0.4458}, + {"N9", 0.01508}, + {"N10", -1.95}, + {"N11", -0.3239}, + {"N12", -1.119}, + {"N13", -0.3396}, + {"N14", 0.2887}, + {"N", -0.4806}, + {"O1", 0.1552}, + {"O2", -0.2893}, + {"O3", -0.0684}, + {"O4", -0.4195}, + {"O5", 0.0335}, + {"O6", -0.3339}, + {"O7", -1.189}, + {"O8", 0.1788}, + {"O9", -0.1526}, + {"O10", 0.1129}, + {"O11", 0.4833}, + {"O12", -1.326}, + {"O", -0.1188}, + {"F2", -2.996}, + {"F", 0.4202}, + {"Cl2", -2.996}, + {"Cl", 0.6895}, + {"Br2", -2.2996}, + {"Br", 0.8456}, + {"I2", -2.996}, + {"I", 0.8857}, + {"P", 0.8612}, + {"S1", 0.6482}, + {"S2", -0.0024}, + {"S3", 0.6237}, + {"Me1", -0.3808}, + {"Me2", -0.0025}, + {"Hal", -2.996} + }; + // clang-format on + return contributions; + } + + QueryMolecule& loadSmarts(const std::string& smarts) + { + thread_local unordered_map smartsHolder; + if (smartsHolder.count(smarts) == 0) + { + BufferScanner scanner(smarts.c_str()); + SmilesLoader loader(scanner); + loader.loadSMARTS(smartsHolder[smarts]); + } + return smartsHolder.at(smarts); + } + + const unordered_map& getMRContributions() + { + // clang-format off + static unordered_map contributions{ + {"C1", 2.503}, + {"C2", 2.433}, + {"C3", 2.753}, + {"C4", 2.731}, + {"C5", 5.007}, + {"C6", 3.513}, + {"C7", 3.888}, + {"C8", 2.464}, + {"C9", 2.412}, + {"C10", 2.488}, + {"C11", 2.582}, + {"C12", 2.576}, + {"C13", 4.041}, + {"C14", 3.257}, + {"C15", 3.564}, + {"C16", 3.18}, + {"C17", 3.104}, + {"C18", 3.35}, + {"C19", 4.346}, + {"C20", 3.904}, + {"C21", 3.509}, + {"C22", 4.067}, + {"C23", 3.853}, + {"C24", 2.673}, + {"C25", 3.135}, + {"C26", 4.305}, + {"C27", 2.693}, + {"C", 3.243}, + {"H1", 1.057}, + {"H2", 1.395}, + {"H3", 0.9627}, + {"H4", 1.805}, + {"H", 1.112}, + {"N1", 2.262}, + {"N2", 2.173}, + {"N3", 2.827}, + {"N4", 3.0}, + {"N5", 1.751}, + {"N6", 2.428}, + {"N7", 1.839}, + {"N8", 2.819}, + {"N9", 1.725}, + {"N10", std::nanf("")}, + {"N11", 2.202}, + {"N12", std::nanf("")}, + {"N13", 0.2604}, + {"N14", 3.359}, + {"N", 2.134}, + {"O1", 1.08}, + {"O2", 0.8238}, + {"O3", 1.085}, + {"O4", 1.182}, + {"O5", 3.367}, + {"O6", 0.7774}, + {"O7", 0.0}, + {"O8", 3.135}, + {"O9", 0.0}, + {"O10", 0.2215}, + {"O11", 0.389}, + {"O12", std::nanf("")}, + {"O", 0.6865}, + {"F2", std::nanf("")}, + {"F", 5.853}, + {"Cl2", std::nanf("")}, + {"Cl", 5.853}, + {"Br2", std::nanf("")}, + {"Br", 8.927}, + {"I2", std::nanf("")}, + {"I", 14.02}, + {"P", 6.92}, + {"S1", 7.591}, + {"S2", 7.365}, + {"S3", 6.691}, + {"Me1", 5.754}, + {"Me2", std::nanf("")}, + {"Hal", std::nanf("")} + }; + // clang-format on + return contributions; + } + + unordered_map calculateMatches(Molecule& rawMolecule) + { + Molecule molecule; + molecule.clone(rawMolecule); + if (!molecule.isAromatized()) + { + molecule.aromatize(AromaticityOptions()); + } + Array markers; + molecule.unfoldHydrogens(&markers, -1); + unordered_set ignoredAtoms; + ignoredAtoms.reserve(molecule.vertexCount()); + Array mapping; + + unordered_map result; + for (const auto& query : getQueries()) + { + const auto& queryClass = query.first; + const auto& querySmartsVector = query.second; + int matches = 0; + for (const auto& querySmarts : querySmartsVector) + { + auto& queryMolecule = loadSmarts(querySmarts); + MoleculeSubstructureMatcher matcher(molecule); + matcher.use_aromaticity_matcher = true; + matcher.disable_folding_query_h = true; + matcher.restore_unfolded_h = false; + matcher.find_unique_embeddings = true; + matcher.save_for_iteration = true; + matcher.setQuery(queryMolecule); + + for (bool flag = matcher.find(); flag; flag = matcher.findNext()) + { + mapping.clear(); + mapping.copy(matcher.getQueryMapping(), queryMolecule.vertexEnd()); + const auto index = mapping[0]; + if (index > -1 && !ignoredAtoms.count(index)) + { + ignoredAtoms.insert(index); + ++matches; + } + } + + if (ignoredAtoms.size() == molecule.vertexCount()) + { + break; + } + } + if (matches) + { + result[queryClass] = matches; + } + } + + return result; + } +} + +namespace indigo +{ + double Crippen::logP(Molecule& molecule) + { + const auto& matches = calculateMatches(molecule); + const auto& logPContributions = getLogPContributions(); + double logP = 0.0; + for (const auto& match : matches) + { + logP += logPContributions.at(match.first) * match.second; + } + return logP; + } + + double Crippen::molarRefractivity(Molecule& molecule) + { + const auto& matches = calculateMatches(molecule); + const auto& contributions = getMRContributions(); + double mr = 0.0; + for (const auto& match : matches) + { + mr += contributions.at(match.first) * match.second; + } + return mr; + } +} diff --git a/core/indigo-core/tests/common.h.in b/core/indigo-core/tests/common.h.in index 011ef25dda..96ebc10b66 100644 --- a/core/indigo-core/tests/common.h.in +++ b/core/indigo-core/tests/common.h.in @@ -6,6 +6,11 @@ #include +#define METHANE "C" +#define BENZENE "C1=CC=CC=C1" +#define CAFFEINE "CN1C=NC2=C1C(=O)N(C(=O)N2C)C" +#define SULFASALAZINE "C1=CC=NC(=C1)NS(=O)(=O)C2=CC=C(C=C2)N=NC3=CC(=C(C=C3)O)C(=O)O" + namespace indigo { class IndigoCoreTest : public ::testing::Test diff --git a/core/indigo-core/tests/tests/molecule.cpp b/core/indigo-core/tests/tests/molecule.cpp index 2cc4c7fba5..4e9354b205 100644 --- a/core/indigo-core/tests/tests/molecule.cpp +++ b/core/indigo-core/tests/tests/molecule.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -120,3 +121,49 @@ TEST_F(IndigoCoreMoleculeTest, numHydrogenBondDonors) loadMolecule("COP(=O)(OC)OC(=CCl)C1=CC(=C(C=C1Cl)Cl)Cl", molecule); EXPECT_EQ(0, Lipinski::getNumHydrogenBondDonors(molecule)); } + +TEST_F(IndigoCoreMoleculeTest, cLogP) +{ + Molecule molecule; + { + loadMolecule(METHANE, molecule); + EXPECT_NEAR(0.6361, Crippen::logP(molecule), 0.01); + } + { + loadMolecule("C[U]", molecule); + EXPECT_NEAR(0.5838, Crippen::logP(molecule), 0.01); + } + { + loadMolecule(BENZENE, molecule); + EXPECT_NEAR(1.6865, Crippen::logP(molecule), 0.01); + } + { + loadMolecule(CAFFEINE, molecule); + EXPECT_NEAR(0.06, Crippen::logP(molecule), 0.01); + } + { + loadMolecule(SULFASALAZINE, molecule); + EXPECT_NEAR(3.7, Crippen::logP(molecule), 0.01); + } +} + +TEST_F(IndigoCoreMoleculeTest, cMolarRefractivity) +{ + Molecule molecule; + { + loadMolecule(METHANE, molecule); + EXPECT_NEAR(6.731, Crippen::molarRefractivity(molecule), 0.01); + } + { + loadMolecule(BENZENE, molecule); + EXPECT_NEAR(26.442, Crippen::molarRefractivity(molecule), 0.01); + } + { + loadMolecule(CAFFEINE, molecule); + EXPECT_NEAR(49.1, Crippen::molarRefractivity(molecule), 0.01); + } + { + loadMolecule(SULFASALAZINE, molecule); + EXPECT_NEAR(100.73, Crippen::molarRefractivity(molecule), 0.01); + } +}