diff --git a/.github/workflows/indigo-ci.yaml b/.github/workflows/indigo-ci.yaml index af3c6f312e..9eb365e23a 100644 --- a/.github/workflows/indigo-ci.yaml +++ b/.github/workflows/indigo-ci.yaml @@ -738,7 +738,7 @@ jobs: with: java-version: ${{ matrix.java-version }} - name: Cache Maven packages - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ~/.m2 key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} diff --git a/api/wasm/indigo-ketcher/indigo-ketcher.cpp b/api/wasm/indigo-ketcher/indigo-ketcher.cpp index a8bd71d84f..821407e67f 100644 --- a/api/wasm/indigo-ketcher/indigo-ketcher.cpp +++ b/api/wasm/indigo-ketcher/indigo-ketcher.cpp @@ -275,7 +275,7 @@ namespace indigo void indigoSetOptions(const std::map& options) { - std::set to_skip{"smiles", "smarts", "input-format", "output-content-type", "monomerLibrary"}; + std::set to_skip{"smiles", "smarts", "input-format", "output-content-type", "monomerLibrary", "sequence-type"}; for (const auto& option : options) { if (to_skip.count(option.first) < 1) @@ -454,13 +454,41 @@ namespace indigo if (library >= 0) { + auto sequence_type = options.find("sequence-type"); + if (sequence_type != options.end() && sequence_type->second == "PEPTIDE") + { + print_js("try as PEPTIDE-3-LETTER"); + objectId = indigoLoadSequenceFromString(data.c_str(), "PEPTIDE-3-LETTER", library); + if (objectId >= 0) + { + return IndigoKetcherObject(objectId, IndigoKetcherObject::EKETDocument); + } + } print_js("try as IDT"); objectId = indigoLoadIdtFromString(data.c_str(), library); if (objectId >= 0) { return IndigoKetcherObject(objectId, IndigoKetcherObject::EKETDocument); } - + if (sequence_type != options.end()) + { + std::string msg = "try as " + sequence_type->second; + print_js(msg.c_str()); + objectId = indigoLoadSequenceFromString(data.c_str(), sequence_type->second.c_str(), library); + if (objectId >= 0) + { + return IndigoKetcherObject(objectId, IndigoKetcherObject::EKETDocument); + } + } + else + { + print_js("try as PEPTIDE-3-LETTER"); + objectId = indigoLoadSequenceFromString(data.c_str(), "PEPTIDE-3-LETTER", library); + if (objectId >= 0) + { + return IndigoKetcherObject(objectId, IndigoKetcherObject::EKETDocument); + } + } print_js("try as HELM"); objectId = indigoLoadHelmFromString(data.c_str(), library); if (objectId >= 0) diff --git a/api/wasm/indigo-ketcher/test/test.js b/api/wasm/indigo-ketcher/test/test.js index 1bfa11bc0d..443ccbb158 100644 --- a/api/wasm/indigo-ketcher/test/test.js +++ b/api/wasm/indigo-ketcher/test/test.js @@ -836,6 +836,14 @@ M END const peptide_seq_ref1 = fs.readFileSync("peptide_ref.seq"); assert.equal(peptide_seq, peptide_seq_ref1.toString()); options.delete(); + // test autodetect + let ad_options = new indigo.MapStringString(); + ad_options.set("output-content-type", "application/json"); + ad_options.set("monomerLibrary", monomersLib); + ad_options.set("sequence-type", "PEPTIDE"); + const res2 = indigo.convert(peptide_seq_ref, "ket", ad_options); + assert.equal(res2, peptide_ket_ref.toString()); + ad_options.delete(); }); } @@ -861,6 +869,16 @@ M END const peptide_seq1 = indigo.convert(peptide_ket_ref.toString(), "peptide-sequence-3-letter", options); assert.equal(peptide_seq1, peptide_seq_ref); options.delete(); + // test autodetect + let ad_options = new indigo.MapStringString(); + ad_options.set("output-content-type", "application/json"); + ad_options.set("monomerLibrary", monomersLib); + const res2 = JSON.parse(indigo.convert(peptide_seq_ref, "peptide-sequence-3-letter", ad_options)).struct; + assert.equal(res2, peptide_seq_ref); + ad_options.set("sequence-type", "PEPTIDE"); + const res3 = JSON.parse(indigo.convert(peptide_seq_ref, "peptide-sequence-3-letter", ad_options)).struct; + assert.equal(res3, peptide_seq_ref); + ad_options.delete(); }); } @@ -883,6 +901,14 @@ M END const rna_seq_ref1 = fs.readFileSync("rna_ref.seq"); assert.equal(rna_seq, rna_seq_ref1.toString()); options.delete(); + // test autodetect + let ad_options = new indigo.MapStringString(); + ad_options.set("output-content-type", "application/json"); + ad_options.set("monomerLibrary", monomersLib); + ad_options.set("sequence-type", "RNA"); + const res2 = indigo.convert(rna_seq_ref, "sequence", ad_options); + assert.equal(res2, rna_seq_ref1.toString()); + ad_options.delete(); }); } @@ -906,6 +932,14 @@ M END const dna_seq_ref1 = fs.readFileSync("dna_ref.seq"); assert.equal(dna_seq, dna_seq_ref1.toString()); options.delete(); + // test autodetect + let ad_options = new indigo.MapStringString(); + ad_options.set("output-content-type", "application/json"); + ad_options.set("monomerLibrary", monomersLib); + ad_options.set("sequence-type", "DNA"); + const res2 = indigo.convert(dna_seq_ref, "sequence", ad_options); + assert.equal(res2, dna_seq_ref1.toString()); + ad_options.delete(); }); } diff --git a/core/indigo-core/common/base_cpp/profiling.h b/core/indigo-core/common/base_cpp/profiling.h index c8f9cdfcc8..e187eab6ba 100644 --- a/core/indigo-core/common/base_cpp/profiling.h +++ b/core/indigo-core/common/base_cpp/profiling.h @@ -24,6 +24,7 @@ #endif #include +#include #include #include "base_cpp/array.h" diff --git a/core/indigo-core/reaction/src/query_reaction.cpp b/core/indigo-core/reaction/src/query_reaction.cpp index d4d6e7fb81..df9bc4287e 100644 --- a/core/indigo-core/reaction/src/query_reaction.cpp +++ b/core/indigo-core/reaction/src/query_reaction.cpp @@ -182,7 +182,7 @@ int QueryReaction::_addBaseMolecule(int side) std::unique_ptr QueryReaction::getBaseReaction(int index) { - std::unique_ptr query_reaction; + std::unique_ptr query_reaction(neu()); query_reaction->clone(*this); return query_reaction; } diff --git a/utils/indigo-service/backend/service/tests/api/indigo_test.py b/utils/indigo-service/backend/service/tests/api/indigo_test.py index 530ac20498..e1a30d50cd 100644 --- a/utils/indigo-service/backend/service/tests/api/indigo_test.py +++ b/utils/indigo-service/backend/service/tests/api/indigo_test.py @@ -3213,9 +3213,11 @@ def test_convert_sequences(self): with open(lib_path, "r") as file: monomer_library = file.read() + monomer_struct = "ACGTU" + headers, data = self.get_headers( { - "struct": "ACGTU", + "struct": monomer_struct, "options": {"monomerLibrary": monomer_library}, "input_format": "chemical/x-rna-sequence", "output_format": "chemical/x-indigo-ket", @@ -3227,7 +3229,7 @@ def test_convert_sequences(self): headers, data = self.get_headers( { - "struct": "ACGTU", + "struct": monomer_struct, "options": {"monomerLibrary": monomer_library}, "input_format": "chemical/x-rna-sequence", "output_format": "chemical/x-sequence", @@ -3238,11 +3240,31 @@ def test_convert_sequences(self): self.url_prefix + "/convert", headers=headers, data=data ) - self.assertEqual(json.loads(result_rna_1.text)["struct"], "ACGTU") + self.assertEqual( + json.loads(result_rna_1.text)["struct"], monomer_struct + ) + # test autodetect RNA headers, data = self.get_headers( { "struct": "ACGTU", + "options": { + "monomerLibrary": monomer_library, + "sequence-type": "RNA", + }, + "output_format": "chemical/x-sequence", + } + ) + result_rna_ad = requests.post( + self.url_prefix + "/convert", headers=headers, data=data + ) + self.assertEqual( + json.loads(result_rna_ad.text)["struct"], monomer_struct + ) + + headers, data = self.get_headers( + { + "struct": monomer_struct, "options": {"monomerLibrary": monomer_library}, "input_format": "chemical/x-dna-sequence", "output_format": "chemical/x-indigo-ket", @@ -3254,7 +3276,7 @@ def test_convert_sequences(self): headers, data = self.get_headers( { - "struct": "ACGTU", + "struct": monomer_struct, "options": {"monomerLibrary": monomer_library}, "input_format": "chemical/x-dna-sequence", "output_format": "chemical/x-sequence", @@ -3264,11 +3286,31 @@ def test_convert_sequences(self): self.url_prefix + "/convert", headers=headers, data=data ) - self.assertEqual(json.loads(result_dna_1.text)["struct"], "ACGTU") + self.assertEqual( + json.loads(result_dna_1.text)["struct"], monomer_struct + ) + # test autodetect DNA headers, data = self.get_headers( { - "struct": "ACGTU", + "struct": monomer_struct, + "options": { + "monomerLibrary": monomer_library, + "sequence-type": "DNA", + }, + "output_format": "chemical/x-sequence", + } + ) + result_dna_ad = requests.post( + self.url_prefix + "/convert", headers=headers, data=data + ) + self.assertEqual( + json.loads(result_dna_ad.text)["struct"], monomer_struct + ) + + headers, data = self.get_headers( + { + "struct": monomer_struct, "options": {"monomerLibrary": monomer_library}, "input_format": "chemical/x-peptide-sequence", "output_format": "chemical/x-indigo-ket", @@ -3278,6 +3320,24 @@ def test_convert_sequences(self): self.url_prefix + "/convert", headers=headers, data=data ) + # test autodetect PEPTIDE + headers, data = self.get_headers( + { + "struct": monomer_struct, + "options": { + "monomerLibrary": monomer_library, + "sequence-type": "PEPTIDE", + }, + "output_format": "chemical/x-sequence", + } + ) + result_peptide_ad = requests.post( + self.url_prefix + "/convert", headers=headers, data=data + ) + self.assertEqual( + json.loads(result_peptide_ad.text)["struct"], monomer_struct + ) + peptide_3 = "AlaCysGlyThrSec" headers, data = self.get_headers( { @@ -3291,6 +3351,39 @@ def test_convert_sequences(self): self.url_prefix + "/convert", headers=headers, data=data ) + # test autodetect PEPTIDE-3-LETTER + headers, data = self.get_headers( + { + "struct": peptide_3, + "options": { + "monomerLibrary": monomer_library, + "sequence-type": "PEPTIDE", + }, + "output_format": "chemical/x-sequence", + } + ) + result_peptide_3_ad = requests.post( + self.url_prefix + "/convert", headers=headers, data=data + ) + self.assertEqual( + json.loads(result_peptide_3_ad.text)["struct"], monomer_struct + ) + + headers, data = self.get_headers( + { + "struct": peptide_3, + "options": {"monomerLibrary": monomer_library}, + "output_format": "chemical/x-sequence", + } + ) + result_peptide_3_ad_no_type = requests.post( + self.url_prefix + "/convert", headers=headers, data=data + ) + self.assertEqual( + json.loads(result_peptide_3_ad_no_type.text)["struct"], + monomer_struct, + ) + headers, data = self.get_headers( { "struct": json.loads(result_ket_3.text)["struct"], diff --git a/utils/indigo-service/backend/service/v2/indigo_api.py b/utils/indigo-service/backend/service/v2/indigo_api.py index c174a7da9e..35e2235bd4 100644 --- a/utils/indigo-service/backend/service/v2/indigo_api.py +++ b/utils/indigo-service/backend/service/v2/indigo_api.py @@ -79,6 +79,7 @@ def indigo_init(options={}): "input-format", "output-content-type", "monomerLibrary", + "sequence-type", ): continue tls.indigo.setOption(option, value) @@ -306,6 +307,52 @@ def remove_unselected_repeating_units_r(r, selected): remove_unselected_repeating_units_m(m, moleculeAtoms) +def try_load_macromol(indigo, md, molstr, library, options): + sequence_type = options.get("sequence-type") + if sequence_type == "PEPTIDE": + try: + md.struct = indigo.loadSequence( + molstr, "PEPTIDE-3-LETTER", library + ) + md.is_rxn = False + md.is_query = False + return + except IndigoException: + pass + try: + md.struct = indigo.loadIdt(molstr, library) + md.is_rxn = False + md.is_query = False + return + except IndigoException: + pass + if sequence_type is not None: + try: + md.struct = indigo.loadSequence(molstr, sequence_type, library) + md.is_rxn = False + md.is_query = False + return + except IndigoException: + pass + else: + try: + md.struct = indigo.loadSequence( + molstr, "PEPTIDE-3-LETTER", library + ) + md.is_rxn = False + md.is_query = False + return + except IndigoException: + pass + try: + md.struct = indigo.loadHelm(molstr, library) + except IndigoException: + raise HttpException( + "struct data not recognized as molecule, query, reaction or reaction query", + 400, + ) + + def load_moldata( molstr, indigo=None, @@ -408,21 +455,10 @@ def load_moldata( "struct data not recognized as molecule, query, reaction or reaction query", 400, ) - else: # has library try to load IDT and HELM - try: - md.struct = indigo.loadIdt(molstr, library) - md.is_rxn = False - except IndigoException: - try: - md.struct = indigo.loadHelm( - molstr, library - ) - except IndigoException: - raise HttpException( - "struct data not recognized as molecule, query, reaction or reaction query", - 400, - ) - + else: # has library try to load macromolecule + try_load_macromol( + indigo, md, molstr, library, options + ) return md