Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#1900 Export of modified RNA to IDT notation (modified IDT monomers) #1945

Merged
merged 8 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 29 additions & 5 deletions api/tests/integration/ref/formats/idt_to_ket.py.out
Original file line number Diff line number Diff line change
@@ -1,7 +1,31 @@
*** IDT to KET ***
idt_a.ket:SUCCEED
idt_acg.ket:SUCCEED
idt_maxmgc.ket:SUCCEED
idt_2moera.ket:SUCCEED
idt_52moera_with_3phos.ket:SUCCEED
idt_bases.ket:SUCCEED
idt_many_molecules.ket:SUCCEED
idt_mixed.ket:SUCCEED
idt_mod_phosphates.ket:SUCCEED
idt_modifications.ket:SUCCEED
Got expected error 'SEQUENCE loader: SequenceLoader::loadIdt(), Invalid symbols in the sequence: !,-,$,#,1,2,w,1,2,2,3,e,3,2,e,3,3'
idt_prefix_suffix.ket:SUCCEED
idt_singe_32moera_nucleoside.ket:SUCCEED
idt_single_nucleoside.ket:SUCCEED
idt_std_phosphates.ket:SUCCEED
Test '!+-$#12w12r23e32e33': got expected error 'SequenceLoader::loadIdt(), Invalid symbols in the sequence: !,-,$,#,1,2,w,1,2,2,3,e,3,2,e,3,3'
Test '+/5Phos/A': got expected error 'Sugar prefix could not be used with '/5Phos/''
Test '/': got expected error 'Unexpected end of data'
Test '//': got expected error 'Invalid modification: empty string.'
Test '/32MOErA/T': got expected error 'IDT alias 32MOErA not found at five-prime end position.'
Test '/3Phos/T': expected error 'IDT alias 3Phos not found at three-prime end position.' but got 'SEQUENCE loader: IDT alias 3Phos not found at five-prime end position.'
Test '/52MOErA/': got expected error 'IDT alias 52MOErA not found at three-prime end position.'
Test '/5Phos/*A': got expected error '/5Phos/ cannot be modified to 'sP''
Test '/a/': got expected error 'Invalid modification: a.'
Test '/i2MOErA/': got expected error 'IDT alias i2MOErA not found at three-prime end position.'
Test '/i2MOErA/T': got expected error 'IDT alias i2MOErA not found at five-prime end position.'
Test 'A*': got expected error 'Invalid IDT sequence: '*' couldn't be the last symbol.'
Test 'A+/3Phos/': got expected error 'Sugar prefix could not be used with '/3Phos/''
Test 'Am/3Phos/': got expected error 'Sugar prefix could not be used with '/3Phos/''
Test 'Ar/3Phos/': got expected error 'Sugar prefix could not be used with '/3Phos/''
Test 'T/52MOErA/': got expected error 'IDT alias 52MOErA not found at three-prime end position.'
Test 'T/5Phos/': got expected error 'IDT alias 5Phos not found at three-prime end position.'
Test 'T/i2MOErA/': got expected error 'IDT alias i2MOErA not found at three-prime end position.'
Test 'm/5Phos/A': got expected error 'Sugar prefix could not be used with '/5Phos/''
Test 'r/5Phos/A': got expected error 'Sugar prefix could not be used with '/5Phos/''
20 changes: 19 additions & 1 deletion api/tests/integration/ref/formats/ket_to_idt.py.out
Original file line number Diff line number Diff line change
@@ -1,2 +1,20 @@
*** KET to IDT ***
1654-dna-to-idt.idt:SUCCEED
idt_52moera_with_3phos.ket:SUCCEED
idt_bases.ket:SUCCEED
idt_many_molecules.ket:SUCCEED
idt_mixed.ket:SUCCEED
idt_mod_phosphates.ket:SUCCEED
idt_modifications.ket:SUCCEED
idt_prefix_suffix.ket:SUCCEED
idt_singe_32moera_nucleoside.ket:SUCCEED
idt_single_nucleoside.ket:SUCCEED
idt_std_phosphates.ket:SUCCEED
Test ket-to-idt-invalid-last-phosphate: got expected error 'Canot save molecule in IDT format - phosphate sP cannot be last monomer in sequence.'
Test ket-to-idt-invalid-nucleotide: got expected error 'IDT alias for group sugar:m2e2r base:z8c3G phosphate:mepo2 not found.'
Test ket-to-idt-invalid-posphates: got expected error 'Canot save molecule in IDT format - sugar R with too much phosphates connected P and P.'
Test ket-to-idt-invalid-sugar: got expected error 'IDT alias for sugar:m2e2r not found.'
Test ket-to-idt-invalid-sugar-base: got expected error 'IDT alias for group sugar:m2e2r base:z8c3G not found.'
Test ket-to-idt-invalid-sugar-phosphate: got expected error 'IDT alias for group sugar:m2e2r phosphate:mepo2 not found.'
Test ket-to-idt-peptide: got expected error 'Canot save molecule in IDT format - AA monomer DPhe4C cannot be first.'
Test ket-to-idt-r1r1connection: got expected error 'Canot save molecule in IDT format - sugar MOE connected to monomer MOE with class SUGAR (only base or phosphate expected).'
Test ket-to-idt-two-bases: got expected error 'Canot save molecule in IDT format - sugar R with two base connected A and C.'
69 changes: 45 additions & 24 deletions api/tests/integration/tests/formats/idt_to_ket.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,49 +28,70 @@ def find_diff(a, b):
root = joinPathPy("molecules/", __file__)
ref_path = joinPathPy("ref/", __file__)

idt_names = [
"idt_a",
"idt_acg",
"idt_maxmgc",
"idt_2moera",
"idt_modifications",
]

idt_data = {
"idt_a": "A",
"idt_acg": "ACG",
"idt_maxmgc": "mA*mGC",
"idt_2moera": "/52MOErA//i2MOErA//32MOErA/",
"idt_modifications": "/5Phos//i2MOErC//3Phos/",
"idt_single_nucleoside": "A",
"idt_bases": "ATCGUI",
"idt_prefix_suffix": "mA*rT*+C*G*+UrImA",
"idt_modifications": "/52MOErA/*/i2MOErA//32MOErA/",
"idt_52moera_with_3phos": "/52MOErA//3Phos/",
"idt_singe_32moera_nucleoside": "/32MOErA/",
"idt_std_phosphates": "/5Phos/ATG/3Phos/",
"idt_mod_phosphates": "/5Phos//i2MOErC//3Phos/",
"idt_mixed": "/5Phos/+A*/i2MOErA/*rG/3Phos/",
"idt_many_molecules": "ACTG\n/52MOErA/*AU/3Phos/\rAC/i2MOErC//3Phos/\n\rTACG",
}

lib = indigo.loadMoleculeFromFile(
os.path.join(ref_path, "monomer_library.ket")
)

for idt_name in idt_names:
mol = indigo.loadIdt(idt_data[idt_name])
# with open(os.path.join(ref_path, idt_name) + ".ket", "w") as file:
for filename in sorted(idt_data.keys()):
mol = indigo.loadIdt(idt_data[filename])
# with open(os.path.join(ref_path, filename) + ".ket", "w") as file:
# file.write(mol.json())
with open(os.path.join(ref_path, idt_name) + ".ket", "r") as file:
with open(os.path.join(ref_path, filename) + ".ket", "r") as file:
ket_ref = file.read()
ket = mol.json()
diff = find_diff(ket_ref, ket)
if not diff:
print(idt_name + ".ket:SUCCEED")
print(filename + ".ket:SUCCEED")
else:
print(idt_name + ".ket:FAILED")
print(filename + ".ket:FAILED")
print(diff)

idt_errors = {
"!+-$#12w12r23e32e33": "SEQUENCE loader: SequenceLoader::loadIdt(), Invalid symbols in the sequence: !,-,$,#,1,2,w,1,2,2,3,e,3,2,e,3,3"
"!+-$#12w12r23e32e33": "SequenceLoader::loadIdt(), Invalid symbols in the sequence: !,-,$,#,1,2,w,1,2,2,3,e,3,2,e,3,3",
"/": "Unexpected end of data",
"//": "Invalid modification: empty string.",
"/a/": "Invalid modification: a.",
"A*": "Invalid IDT sequence: '*' couldn't be the last symbol.",
"/i2MOErA/": "IDT alias i2MOErA not found at three-prime end position.",
"/i2MOErA/T": "IDT alias i2MOErA not found at five-prime end position.",
"T/i2MOErA/": "IDT alias i2MOErA not found at three-prime end position.",
"/32MOErA/T": "IDT alias 32MOErA not found at five-prime end position.",
"/52MOErA/": "IDT alias 52MOErA not found at three-prime end position.",
"T/52MOErA/": "IDT alias 52MOErA not found at three-prime end position.",
"/3Phos/T": "IDT alias 3Phos not found at three-prime end position.",
"T/5Phos/": "IDT alias 5Phos not found at three-prime end position.",
"/5Phos/*A": "/5Phos/ cannot be modified to 'sP'",
"r/5Phos/A": "Sugar prefix could not be used with '/5Phos/'",
"+/5Phos/A": "Sugar prefix could not be used with '/5Phos/'",
"m/5Phos/A": "Sugar prefix could not be used with '/5Phos/'",
"Ar/3Phos/": "Sugar prefix could not be used with '/3Phos/'",
"A+/3Phos/": "Sugar prefix could not be used with '/3Phos/'",
"Am/3Phos/": "Sugar prefix could not be used with '/3Phos/'",
}
for idt_seq, error in idt_errors.items():
for idt_seq in sorted(idt_errors.keys()):
error = idt_errors[idt_seq]
try:
mol = indigo.loadIdt(idt_seq)
print("Test %s failed: exception expected." % idt_seq)
except IndigoException as e:
text = getIndigoExceptionText(e)
if text == error:
print("Got expected error '%s'" % error)
if error in text:
print("Test '%s': got expected error '%s'" % (idt_seq, error))
else:
print("Expected error '%s' but got '%s'" % (error, text))
print(
"Test '%s': expected error '%s' but got '%s'"
% (idt_seq, error, text)
)
84 changes: 65 additions & 19 deletions api/tests/integration/tests/formats/ket_to_idt.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,77 @@ def find_diff(a, b):
os.path.join(os.path.abspath(__file__), "..", "..", "..", "common")
)
)
from env_indigo import Indigo, joinPathPy # noqa
from env_indigo import ( # noqa
Indigo,
IndigoException,
getIndigoExceptionText,
joinPathPy,
)

indigo = Indigo()
indigo.setOption("ignore-stereochemistry-errors", True)

print("*** KET to IDT ***")

root = joinPathPy("molecules/", __file__)
ref_path = joinPathPy("ref/", __file__)

files = [
"1654-dna-to-idt",
]

files.sort()
for filename in files:
mol = indigo.loadMoleculeFromFile(os.path.join(root, filename + ".ket"))
# with open(os.path.join(ref_path, filename) + ".idt", "w") as file:
# file.write(mol.idt())
with open(os.path.join(ref_path, filename) + ".idt", "r") as file:
idt_ref = file.read()
ref = joinPathPy("ref/", __file__)

indigo.loadMoleculeFromFile(os.path.join(ref, "monomer_library.ket"))

# same ref ket files used to check idt-to-ket and to check ket-to-idt
idt_data = {
"idt_single_nucleoside": "A",
"idt_bases": "ATCGUI",
"idt_prefix_suffix": "mA*rT*+C*G*+UrImA",
"idt_modifications": "/52MOErA/*/i2MOErA//32MOErA/",
"idt_52moera_with_3phos": "/52MOErA//3Phos/",
"idt_singe_32moera_nucleoside": "/32MOErA/",
"idt_std_phosphates": "/5Phos/ATG/3Phos/",
"idt_mod_phosphates": "/5Phos//i2MOErC//3Phos/",
"idt_mixed": "/5Phos/+A*/i2MOErA/*rG/3Phos/",
"idt_many_molecules": "ACTG\n/52MOErA/*AU/3Phos/\nAC/i2MOErC//3Phos/\nTACG",
}

for filename in sorted(idt_data.keys()):
mol = indigo.loadMoleculeFromFile(os.path.join(ref, filename + ".ket"))
idt = mol.idt()
diff = find_diff(idt_ref, idt)
if not diff:
print(filename + ".idt:SUCCEED")
idt_ref = idt_data[filename]
if idt_ref == idt:
print(filename + ".ket:SUCCEED")
else:
print(filename + ".idt:FAILED")
print(diff)
print(
"%s.idt FAILED : expected '%s', got '%s'"
% (filename, idt_ref, idt)
)

idt_errors = {
"ket-to-idt-r1r1connection": "Canot save molecule in IDT format - sugar MOE connected to monomer MOE with class SUGAR (only base or phosphate expected).",
"ket-to-idt-peptide": "Canot save molecule in IDT format - AA monomer DPhe4C cannot be first.",
"ket-to-idt-two-bases": "Canot save molecule in IDT format - sugar R with two base connected A and C.",
"ket-to-idt-invalid-posphates": "Canot save molecule in IDT format - sugar R with too much phosphates connected P and P.",
"ket-to-idt-invalid-last-phosphate": "Canot save molecule in IDT format - phosphate sP cannot be last monomer in sequence.",
"ket-to-idt-invalid-nucleotide": "IDT alias for group sugar:m2e2r base:z8c3G phosphate:mepo2 not found.",
"ket-to-idt-invalid-sugar-phosphate": "IDT alias for group sugar:m2e2r phosphate:mepo2 not found.",
"ket-to-idt-invalid-sugar": "IDT alias for sugar:m2e2r not found.",
"ket-to-idt-invalid-sugar-base": "IDT alias for group sugar:m2e2r base:z8c3G not found.",
}
for filename in sorted(idt_errors.keys()):
error = idt_errors[filename]
try:
mol = indigo.loadMoleculeFromFile(
os.path.join(root, filename + ".ket")
)
idt = mol.idt()
print(
"Test %s failed: exception expected but got next idt - '%s'."
% (filename, idt)
)
except IndigoException as e:
text = getIndigoExceptionText(e)
if error in text:
print("Test %s: got expected error '%s'" % (filename, error))
else:
print(
"Test %s: expected error '%s' but got '%s'"
% (filename, error, text)
)
1 change: 0 additions & 1 deletion api/tests/integration/tests/formats/molecules/idt_acg.idt

This file was deleted.

This file was deleted.

Loading
Loading