diff --git a/api/tests/integration/ref/formats/ket_to_idt.py.out b/api/tests/integration/ref/formats/ket_to_idt.py.out index 223ce78375..04c83091ec 100644 --- a/api/tests/integration/ref/formats/ket_to_idt.py.out +++ b/api/tests/integration/ref/formats/ket_to_idt.py.out @@ -18,6 +18,7 @@ idt_i2moera_t.ket:SUCCEED idt_many_molecules.ket:SUCCEED idt_mixed.ket:SUCCEED idt_mixed_custom.ket:SUCCEED +idt_mixed_ketcher.ket:SUCCEED idt_mixed_std.ket:SUCCEED idt_mod_phosphates.ket:SUCCEED idt_modifications.ket:SUCCEED diff --git a/api/tests/integration/tests/formats/ket_to_idt.py b/api/tests/integration/tests/formats/ket_to_idt.py index f26f155180..922da746ff 100644 --- a/api/tests/integration/tests/formats/ket_to_idt.py +++ b/api/tests/integration/tests/formats/ket_to_idt.py @@ -64,6 +64,7 @@ def find_diff(a, b): "idt_mixed_std": "ARAS", "idt_mixed_custom": "(N1:10203050)(N1)N", "idt_rna_dna_mixed_custom": "r(R1:50003000)(R1)", + "idt_mixed_ketcher": "KrK(K1:00003070)r(K2:00003070)", } for filename in sorted(idt_data.keys()): diff --git a/api/tests/integration/tests/formats/ref/idt_mixed_ketcher.ket b/api/tests/integration/tests/formats/ref/idt_mixed_ketcher.ket new file mode 100644 index 0000000000..52e17dcad5 --- /dev/null +++ b/api/tests/integration/tests/formats/ref/idt_mixed_ketcher.ket @@ -0,0 +1,1452 @@ +{ + "root": { + "nodes": [ + { + "$ref": "monomer0" + }, + { + "$ref": "ambiguousMonomer-1" + }, + { + "$ref": "monomer2" + }, + { + "$ref": "monomer3" + }, + { + "$ref": "ambiguousMonomer-4" + }, + { + "$ref": "monomer5" + }, + { + "$ref": "monomer6" + }, + { + "$ref": "ambiguousMonomer-7" + }, + { + "$ref": "monomer8" + }, + { + "$ref": "monomer9" + }, + { + "$ref": "ambiguousMonomer-10" + } + ], + "connections": [ + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer0", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "ambiguousMonomer-1", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer0", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer2", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer3", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "ambiguousMonomer-4", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer2", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer3", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer3", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer5", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer6", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "ambiguousMonomer-7", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer5", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer6", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer6", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer8", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer9", + "attachmentPointId": "R3" + }, + "endpoint2": { + "monomerId": "ambiguousMonomer-10", + "attachmentPointId": "R1" + } + }, + { + "connectionType": "single", + "endpoint1": { + "monomerId": "monomer8", + "attachmentPointId": "R2" + }, + "endpoint2": { + "monomerId": "monomer9", + "attachmentPointId": "R1" + } + } + ], + "templates": [ + { + "$ref": "monomerTemplate-G___Guanine" + }, + { + "$ref": "monomerTemplate-T___Thymine" + }, + { + "$ref": "monomerTemplate-dR___Deoxy-Ribose" + }, + { + "$ref": "monomerTemplate-P___Phosphate" + }, + { + "$ref": "monomerTemplate-U___Uracil" + }, + { + "$ref": "monomerTemplate-R___Ribose" + }, + { + "$ref": "ambiguousMonomerTemplate-K" + }, + { + "$ref": "ambiguousMonomerTemplate-rK" + }, + { + "$ref": "ambiguousMonomerTemplate-(K)" + }, + { + "$ref": "ambiguousMonomerTemplate-r(K)" + } + ] + }, + "monomer0": { + "type": "monomer", + "id": "0", + "seqid": 0, + "position": { + "x": 0.000000, + "y": -0.000000 + }, + "alias": "dR", + "templateId": "dR___Deoxy-Ribose" + }, + "ambiguousMonomer-1": { + "type": "ambiguousMonomer", + "id": "1", + "position": { + "x": 0.000000, + "y": -1.600000 + }, + "seqid": 0, + "alias": "%", + "templateId": "K" + }, + "monomer2": { + "type": "monomer", + "id": "2", + "seqid": 0, + "position": { + "x": 1.600000, + "y": -0.000000 + }, + "alias": "P", + "templateId": "P___Phosphate" + }, + "monomer3": { + "type": "monomer", + "id": "3", + "seqid": 1, + "position": { + "x": 3.200000, + "y": -0.000000 + }, + "alias": "R", + "templateId": "R___Ribose" + }, + "ambiguousMonomer-4": { + "type": "ambiguousMonomer", + "id": "4", + "position": { + "x": 3.200000, + "y": -1.600000 + }, + "seqid": 1, + "alias": "%", + "templateId": "rK" + }, + "monomer5": { + "type": "monomer", + "id": "5", + "seqid": 1, + "position": { + "x": 4.800000, + "y": -0.000000 + }, + "alias": "P", + "templateId": "P___Phosphate" + }, + "monomer6": { + "type": "monomer", + "id": "6", + "seqid": 2, + "position": { + "x": 6.400000, + "y": -0.000000 + }, + "alias": "dR", + "templateId": "dR___Deoxy-Ribose" + }, + "ambiguousMonomer-7": { + "type": "ambiguousMonomer", + "id": "7", + "position": { + "x": 6.400000, + "y": -1.600000 + }, + "seqid": 2, + "alias": "%", + "templateId": "(K)" + }, + "monomer8": { + "type": "monomer", + "id": "8", + "seqid": 2, + "position": { + "x": 8.000000, + "y": -0.000000 + }, + "alias": "P", + "templateId": "P___Phosphate" + }, + "monomer9": { + "type": "monomer", + "id": "9", + "seqid": 3, + "position": { + "x": 9.600000, + "y": -0.000000 + }, + "alias": "R", + "templateId": "R___Ribose" + }, + "ambiguousMonomer-10": { + "type": "ambiguousMonomer", + "id": "10", + "position": { + "x": 9.600000, + "y": -1.600000 + }, + "seqid": 3, + "alias": "%", + "templateId": "r(K)" + }, + "monomerTemplate-G___Guanine": { + "type": "monomerTemplate", + "id": "G___Guanine", + "class": "Base", + "classHELM": "RNA", + "fullName": "Guanine", + "alias": "G", + "naturalAnalogShort": "G", + "attachmentPoints": [ + { + "attachmentAtom": 6, + "type": "left", + "leavingGroup": { + "atoms": [ + 11 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.035400, + 0.249800, + 0.000000 + ] + }, + { + "label": "C", + "location": [ + -0.079200, + -0.754000, + 0.000000 + ] + }, + { + "label": "C", + "location": [ + -1.505700, + -0.290600, + 0.000000 + ] + }, + { + "label": "N", + "location": [ + -1.817700, + 1.176600, + 0.000000 + ] + }, + { + "label": "C", + "location": [ + -0.703100, + 2.180400, + 0.000000 + ] + }, + { + "label": "N", + "location": [ + 0.723500, + 1.717000, + 0.000000 + ] + }, + { + "label": "N", + "location": [ + -2.387100, + -1.503400, + 0.000000 + ] + }, + { + "label": "C", + "location": [ + -1.505300, + -2.716800, + 0.000000 + ] + }, + { + "label": "N", + "location": [ + -0.078700, + -2.253200, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + 2.176800, + -0.120900, + 0.000000 + ] + }, + { + "label": "N", + "location": [ + -0.952700, + 3.354200, + 0.000000 + ] + }, + { + "label": "H", + "location": [ + -3.587100, + -1.503400, + 0.000000 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 0, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 8, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 2, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 10 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 11 + ] + }, + { + "type": 2, + "atoms": [ + 7, + 8 + ] + } + ] + }, + "monomerTemplate-T___Thymine": { + "type": "monomerTemplate", + "id": "T___Thymine", + "class": "Base", + "classHELM": "RNA", + "fullName": "Thymine", + "alias": "T", + "naturalAnalogShort": "T", + "attachmentPoints": [ + { + "attachmentAtom": 3, + "type": "left", + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.861700, + 1.349900, + 0.000000 + ] + }, + { + "label": "C", + "location": [ + 1.111700, + 0.050900, + 0.000000 + ] + }, + { + "label": "C", + "location": [ + -0.388300, + 0.050900, + 0.000000 + ] + }, + { + "label": "N", + "location": [ + -1.138200, + 1.350000, + 0.000000 + ] + }, + { + "label": "C", + "location": [ + -0.388200, + 2.649000, + 0.000000 + ] + }, + { + "label": "N", + "location": [ + 1.111700, + 2.648900, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + 3.061800, + 1.349900, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + -0.988200, + 3.688200, + 0.000000 + ] + }, + { + "label": "H", + "location": [ + -2.338300, + 1.350000, + 0.000000 + ] + }, + { + "label": "C", + "location": [ + 1.711700, + -0.988400, + 0.000000 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 0, + 6 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 8 + ] + }, + { + "type": 2, + "atoms": [ + 4, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 9 + ] + } + ] + }, + "monomerTemplate-dR___Deoxy-Ribose": { + "type": "monomerTemplate", + "id": "dR___Deoxy-Ribose", + "class": "Sugar", + "classHELM": "RNA", + "fullName": "Deoxy-Ribose", + "alias": "dR", + "naturalAnalogShort": "R", + "attachmentPoints": [ + { + "attachmentAtom": 8, + "type": "left", + "leavingGroup": { + "atoms": [ + 9 + ] + } + }, + { + "attachmentAtom": 5, + "type": "right", + "leavingGroup": { + "atoms": [ + 10 + ] + } + }, + { + "attachmentAtom": 2, + "type": "side", + "leavingGroup": { + "atoms": [ + 7 + ] + } + } + ], + "atoms": [ + { + "label": "O", + "location": [ + -0.878800, + -1.208000, + 0.000000 + ] + }, + { + "label": "C", + "location": [ + -0.366800, + 0.201900, + 0.000000 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 0.303800, + -2.130700, + 0.000000 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 1.132300, + 0.150600, + 0.000000 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 1.546800, + -1.291000, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + 2.051500, + 1.333800, + 0.000000 + ] + }, + { + "label": "C", + "location": [ + -1.208100, + 1.441700, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + 0.262800, + -3.329900, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + -2.705000, + 1.333800, + 0.000000 + ] + }, + { + "label": "H", + "location": [ + -3.378800, + 2.326700, + 0.000000 + ] + }, + { + "label": "H", + "location": [ + 3.240300, + 1.170900, + 0.000000 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 6 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 2, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 7 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 5 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 5, + 10 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 8 + ] + }, + { + "type": 1, + "atoms": [ + 8, + 9 + ] + } + ] + }, + "monomerTemplate-P___Phosphate": { + "type": "monomerTemplate", + "id": "P___Phosphate", + "class": "Phosphate", + "classHELM": "RNA", + "fullName": "Phosphate", + "alias": "P", + "naturalAnalogShort": "P", + "attachmentPoints": [ + { + "attachmentAtom": 0, + "type": "left", + "leavingGroup": { + "atoms": [ + 1 + ] + } + }, + { + "attachmentAtom": 0, + "type": "right", + "leavingGroup": { + "atoms": [ + 3 + ] + } + } + ], + "atoms": [ + { + "label": "P", + "location": [ + -0.239900, + 0.000000, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + -1.439900, + 0.000000, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + 0.359800, + -1.039400, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + 0.960100, + 0.000000, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + 0.359800, + 1.039400, + 0.000000 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 4 + ] + } + ] + }, + "monomerTemplate-U___Uracil": { + "type": "monomerTemplate", + "id": "U___Uracil", + "class": "Base", + "classHELM": "RNA", + "fullName": "Uracil", + "alias": "U", + "naturalAnalogShort": "U", + "attachmentPoints": [ + { + "attachmentAtom": 3, + "type": "left", + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "C", + "location": [ + 1.861700, + 1.349900, + 0.000000 + ] + }, + { + "label": "C", + "location": [ + 1.111700, + 0.050900, + 0.000000 + ] + }, + { + "label": "C", + "location": [ + -0.388300, + 0.050900, + 0.000000 + ] + }, + { + "label": "N", + "location": [ + -1.138200, + 1.350000, + 0.000000 + ] + }, + { + "label": "C", + "location": [ + -0.388200, + 2.649000, + 0.000000 + ] + }, + { + "label": "N", + "location": [ + 1.111700, + 2.648900, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + 3.061800, + 1.349900, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + -0.988200, + 3.688200, + 0.000000 + ] + }, + { + "label": "H", + "location": [ + -2.338300, + 1.350000, + 0.000000 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 0, + 6 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 5 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 8 + ] + }, + { + "type": 2, + "atoms": [ + 4, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 4, + 5 + ] + } + ] + }, + "monomerTemplate-R___Ribose": { + "type": "monomerTemplate", + "id": "R___Ribose", + "class": "Sugar", + "classHELM": "RNA", + "fullName": "Ribose", + "alias": "R", + "naturalAnalogShort": "R", + "attachmentPoints": [ + { + "attachmentAtom": 9, + "type": "left", + "leavingGroup": { + "atoms": [ + 10 + ] + } + }, + { + "attachmentAtom": 5, + "type": "right", + "leavingGroup": { + "atoms": [ + 11 + ] + } + }, + { + "attachmentAtom": 2, + "type": "side", + "leavingGroup": { + "atoms": [ + 8 + ] + } + } + ], + "atoms": [ + { + "label": "O", + "location": [ + -1.101700, + -1.066300, + 0.000000 + ] + }, + { + "label": "C", + "location": [ + -0.589700, + 0.343600, + 0.000000 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 0.080900, + -1.988900, + 0.000000 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 0.909500, + 0.292400, + 0.000000 + ], + "stereoLabel": "abs" + }, + { + "label": "C", + "location": [ + 1.323900, + -1.149300, + 0.000000 + ], + "stereoLabel": "abs" + }, + { + "label": "O", + "location": [ + 1.828500, + 1.475500, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + 2.451800, + -1.558900, + 0.000000 + ] + }, + { + "label": "C", + "location": [ + -1.431000, + 1.583400, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + 0.039900, + -3.188100, + 0.000000 + ] + }, + { + "label": "O", + "location": [ + -2.927900, + 1.475500, + 0.000000 + ] + }, + { + "label": "H", + "location": [ + -3.601700, + 2.468400, + 0.000000 + ] + }, + { + "label": "H", + "location": [ + 3.017400, + 1.312500, + 0.000000 + ] + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 1 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 1, + 7 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 2, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 8 + ], + "stereo": 6 + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 5 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 4, + 6 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 5, + 11 + ] + }, + { + "type": 1, + "atoms": [ + 7, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 9, + 10 + ] + } + ] + }, + "ambiguousMonomerTemplate-K": { + "type": "ambiguousMonomerTemplate", + "subtype": "mixture", + "id": "K", + "alias": "%", + "options": [ + { + "templateId": "G___Guanine" + }, + { + "templateId": "T___Thymine" + } + ] + }, + "ambiguousMonomerTemplate-rK": { + "type": "ambiguousMonomerTemplate", + "subtype": "mixture", + "id": "rK", + "alias": "%", + "options": [ + { + "templateId": "G___Guanine" + }, + { + "templateId": "U___Uracil" + } + ] + }, + "ambiguousMonomerTemplate-(K)": { + "type": "ambiguousMonomerTemplate", + "subtype": "mixture", + "id": "(K)", + "alias": "%", + "options": [ + { + "templateId": "G___Guanine", + "ratio": 30.000000 + }, + { + "templateId": "T___Thymine", + "ratio": 70.000000 + } + ] + }, + "ambiguousMonomerTemplate-r(K)": { + "type": "ambiguousMonomerTemplate", + "subtype": "mixture", + "id": "r(K)", + "alias": "%", + "options": [ + { + "templateId": "G___Guanine", + "ratio": 30.000000 + }, + { + "templateId": "U___Uracil", + "ratio": 70.000000 + } + ] + } +} \ No newline at end of file diff --git a/core/indigo-core/molecule/src/sequence_loader.cpp b/core/indigo-core/molecule/src/sequence_loader.cpp index e22eff7da7..86088d2c3d 100644 --- a/core/indigo-core/molecule/src/sequence_loader.cpp +++ b/core/indigo-core/molecule/src/sequence_loader.cpp @@ -1094,9 +1094,7 @@ void SequenceLoader::loadIdt(KetDocument& document) } } if (sugar == "R" && RNA_DNA_MIXED_BASES.count(mixed_base) == 0) - { idt_alias = 'r' + idt_alias; - } if (!document.hasVariantMonomerTemplate(idt_alias)) { auto it = STANDARD_MIXED_BASES.find(mixed_base); @@ -1106,7 +1104,7 @@ void SequenceLoader::loadIdt(KetDocument& document) std::vector options; for (auto template_alias : it->second) { - if (sugar == "r" && template_alias == "T") // U instead of T for RNA + if (sugar == "R" && template_alias == "T") // U instead of T for RNA template_alias = "U"; auto& template_id = _library.getMonomerTemplateIdByAlias(MonomerClass::Base, template_alias); if (template_id.size() == 0) @@ -1125,8 +1123,11 @@ void SequenceLoader::loadIdt(KetDocument& document) templ.setAttachmentPoints(aps); _alias_to_id.emplace(idt_alias, idt_alias); } - else if (ratios.has_value()) - throw Error("Variant monomer %s redefinion", idt_alias.c_str()); + else + { + if (ratios.has_value()) + throw Error("Variant monomer %s redefinion", idt_alias.c_str()); + } } base = idt_alias; diff --git a/core/indigo-core/molecule/src/sequence_saver.cpp b/core/indigo-core/molecule/src/sequence_saver.cpp index 025058d850..1457ddc4e0 100644 --- a/core/indigo-core/molecule/src/sequence_saver.cpp +++ b/core/indigo-core/molecule/src/sequence_saver.cpp @@ -851,7 +851,8 @@ void SequenceSaver::saveIdt(KetDocument& doc, std::vector custom_variants; + std::map custom_variants; + char custom_amb_monomers = 0; while (sequence.size() > 0) { auto monomer_id = sequence.front(); @@ -948,41 +949,81 @@ void SequenceSaver::saveIdt(KetDocument& doc, std::vectoralias(); + const auto& base_monomer = *monomers.at(base_id); + base = base_monomer.alias(); sequence.pop_front(); if (IDT_STANDARD_BASES.count(base) == 0 && STANDARD_MIXED_BASES.count(base) == 0) standard_base = false; - if (base.back() == ')') + if (base_monomer.monomerType() == KetBaseMonomer::MonomerType::VarianMonomer) { variant_base = true; - if (custom_variants.count(base) == 0) + std::string template_id = monomers.at(base_id)->templateId(); + if (custom_variants.count(template_id) > 0) { - custom_variants.emplace(base); - std::array ratios; - for (auto& option : doc.variantTemplates().at(monomers.at(base_id)->templateId()).options()) + base = custom_variants.at(template_id); + } + else + { + std::array ratios{0, 0, 0, 0}; + bool has_ratio = false; + std::set aliases; + std::string s_aliases; + for (auto& option : doc.variantTemplates().at(template_id).options()) { auto& opt_alias = doc.templates().at(option.templateId()).getStringProp("alias"); + aliases.emplace(opt_alias); + if (s_aliases.size() > 0) + s_aliases += ", "; + s_aliases += opt_alias; const auto& it = IDT_BASE_TO_RATIO_IDX.find(opt_alias); if (it == IDT_BASE_TO_RATIO_IDX.end()) - throw Error("Cannot save IDT - unknown mnomer template %s", opt_alias.c_str()); + throw Error("Cannot save IDT - unknown monomer template %s", opt_alias.c_str()); auto ratio = option.ratio(); - if (!ratio.has_value()) + + if (ratio.has_value()) + { + ratios[it->second] = ratio.value(); + has_ratio = true; + } + else if (has_ratio) throw Error("Cannot save IDT - variant monomer template '%s' use template '%s' without ratio.", base.c_str(), opt_alias.c_str()); - ratios[it->second] = ratio.value(); } - base.pop_back(); // remove ')' - base += ':'; - // add ratios - for (auto r : ratios) + if (STANDARD_MIXED_BASES_TO_ALIAS.count(aliases) == 0) + throw Error("Cannot save IDT - unknown mixture of monomers %s", s_aliases.c_str()); + base = STANDARD_MIXED_BASES_TO_ALIAS.at(aliases); + if (RNA_DNA_MIXED_BASES.count(base) == 0) { - int ir = static_cast(std::round(r)); - std::string sr = std::to_string(ir); - if (sr.size() < 2) - sr = '0' + sr; - base += sr; + if (base[0] == 'r') + { + base.erase(base.begin()); + if (sugar != "R") + throw Error("Cannot save IDT - RNA ambigous base connected to DNA sugar."); + } + else if (sugar == "R") + throw Error("Cannot save IDT - DNA ambigous base connected to RNA sugar."); + } + if (has_ratio) + { + std::string base_short = '(' + base; + base_short += '1' + custom_amb_monomers++; + base = base_short; + base += ':'; + // add ratios + for (auto r : ratios) + { + int ir = static_cast(std::round(r)); + std::string sr = std::to_string(ir); + if (sr.size() < 2) + sr = '0' + sr; + base += sr; + } + base += ')'; + base_short += ')'; + custom_variants.emplace(template_id, base_short); } - base += ')'; + else + custom_variants.emplace(template_id, base); } } }