Skip to content

Commit

Permalink
Backmerge: #2337 - Export of HELM with inline SMILES contains "Mod0" …
Browse files Browse the repository at this point in the history
…instead of SMILES

Backmerge ti master
  • Loading branch information
AliaksandrDziarkach committed Sep 17, 2024
1 parent e314cdb commit cca2772
Show file tree
Hide file tree
Showing 8 changed files with 82 additions and 12 deletions.
2 changes: 2 additions & 0 deletions api/tests/integration/ref/formats/ket_to_helm.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,6 @@ helm_multi_char_rna.ket:SUCCEED
helm_peptide.ket:SUCCEED
helm_rna_without_base.ket:SUCCEED
helm_simple_rna.ket:SUCCEED
helm_smiles.ket:SUCCEED
helm_smiles_sugar.ket:SUCCEED
rna_variants.ket:SUCCEED
2 changes: 2 additions & 0 deletions api/tests/integration/tests/formats/ket_to_helm.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ def find_diff(a, b):
"rna_variants": "RNA1{R(A,G)P.R(G,T)P.R(A,C,G,T)}$$$$V2.0",
"helm_monomer_molecule": "PEPTIDE1{A}|PEPTIDE2{G}|CHEM1{[C(N[*:2])=C[*:1] |$;;_R2;;_R1$|]}$CHEM1,PEPTIDE1,1:R2-1:R1|PEPTIDE2,CHEM1,1:R2-1:R1$$$V2.0",
"helm_fractional_ratio": "PEPTIDE1{(A:1.5+C:.1+G:3.)}$$$$V2.0",
"helm_smiles": "PEPTIDE1{G.[[*:1]NC(C(=O)[*:2])C=O |$_R1;;;;;_R2;;$|].C}|PEPTIDE2{G.[[*:1]NC(C(=O)[*:2])C=O |$_R1;;;;;_R2;;$|].C}$$$$V2.0",
"helm_smiles_sugar": "RNA1{[C(C(CO[*:1])O[*:2])[*:3] |$;;;;_R1;;_R2;_R3$|](A)P}$$$$V2.0",
}

for filename in sorted(helm_data.keys()):
Expand Down
4 changes: 2 additions & 2 deletions core/indigo-core/molecule/ket_objects.h
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ namespace indigo
enum class MonomerType
{
Monomer,
VarianMonomer,
AmbiguousMonomer,
};

KetBaseMonomer(MonomerType monomer_type, const std::string& id, const std::string& alias, const std::string& template_id)
Expand Down Expand Up @@ -980,7 +980,7 @@ namespace indigo
inline static std::string ref_prefix = "ambiguousMonomer-";

KetVariantMonomer(const std::string& id, const std::string& alias, const std::string& template_id)
: KetBaseMonomer(MonomerType::VarianMonomer, id, alias, template_id)
: KetBaseMonomer(MonomerType::AmbiguousMonomer, id, alias, template_id)
{
_ref = ref_prefix + _id;
};
Expand Down
5 changes: 5 additions & 0 deletions core/indigo-core/molecule/monomers_template_library.h
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,11 @@ namespace indigo
const std::string& getIdtAliasByModification(IdtModification modification, const std::string sugar_id, const std::string base_id,
const std::string phosphate_id);

const std::map<std::string, MonomerTemplate>& monomerTemplates()
{
return _monomer_templates;
};

private:
std::map<std::string, MonomerTemplate> _monomer_templates;
std::map<std::string, MonomerGroupTemplate> _monomer_group_templates;
Expand Down
2 changes: 2 additions & 0 deletions core/indigo-core/molecule/sequence_saver.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ namespace indigo
class Output;
class BaseMolecule;
class KetDocument;
class KetBaseMonomer;

class DLLEXPORT SequenceSaver
{
Expand Down Expand Up @@ -68,6 +69,7 @@ namespace indigo
private:
std::string getMonomerAlias(BaseMolecule& mol, int atom_idx);
std::string getHelmPolymerClass(BaseMolecule& mol, int atom_idx);
void add_monomer(KetDocument& document, const std::unique_ptr<KetBaseMonomer>& monomer, std::string& helm_string);
SequenceSaver(const SequenceSaver&); // no implicit copy
Output& _output;
const MonomerTemplates& _mon_lib;
Expand Down
10 changes: 9 additions & 1 deletion core/indigo-core/molecule/src/ket_document.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
#include "molecule/molecule.h"
#include "molecule/molecule_json_loader.h"

#ifdef _MSC_VER
#pragma warning(push)
#endif

using namespace indigo;

IMPL_ERROR(KetDocument, "Ket Document")
Expand Down Expand Up @@ -330,7 +334,7 @@ MonomerClass KetDocument::getMonomerClass(const KetBaseMonomer& monomer) const
{
if (monomer.monomerType() == KetBaseMonomer::MonomerType::Monomer)
return _templates.at(monomer.templateId()).monomerClass();
else if (monomer.monomerType() == KetBaseMonomer::MonomerType::VarianMonomer)
else if (monomer.monomerType() == KetBaseMonomer::MonomerType::AmbiguousMonomer)
return _variant_templates.at(monomer.templateId()).monomerClass();
else
throw Error("Unknonwn monomer type");
Expand Down Expand Up @@ -489,3 +493,7 @@ const std::string& KetDocument::monomerIdByRef(const std::string& ref)
throw Error("Monomer with ref %s not found", ref.c_str());
return it->second;
}

#ifdef _MSC_VER
#pragma warning(pop)
#endif
2 changes: 1 addition & 1 deletion core/indigo-core/molecule/src/ket_document_json_saver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ void KetDocumentJsonSaver::saveKetDocument(JsonWriter& writer, const KetDocument
auto& monomer = monomers.at(it);
if (monomer->monomerType() == KetBaseMonomer::MonomerType::Monomer)
saveMonomer(writer, *static_cast<KetMonomer*>(monomer.get()));
else if (monomer->monomerType() == KetBaseMonomer::MonomerType::VarianMonomer)
else if (monomer->monomerType() == KetBaseMonomer::MonomerType::AmbiguousMonomer)
saveVariantMonomer(writer, *static_cast<KetVariantMonomer*>(monomer.get()));
else
throw Error("Unknown monomer type");
Expand Down
67 changes: 59 additions & 8 deletions core/indigo-core/molecule/src/sequence_saver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
#include "molecule/monomers_template_library.h"
#include "molecule/smiles_saver.h"

#ifdef _MSC_VER
#pragma warning(push)
#endif

using namespace indigo;

IMPL_ERROR(SequenceSaver, "Sequence saver");
Expand Down Expand Up @@ -278,7 +282,7 @@ std::string SequenceSaver::saveIdt(BaseMolecule& mol, std::deque<int>& sequence)
return seq_string;
}

static inline void add_monomer(std::string& helm_string, const std::string& monomer_alias)
static inline void add_monomer_str(std::string& helm_string, const std::string& monomer_alias)
{
if (monomer_alias.size() == 1)
helm_string += monomer_alias;
Expand Down Expand Up @@ -386,7 +390,7 @@ std::string SequenceSaver::saveHELM(BaseMolecule& mol, std::vector<std::deque<in
}
if (monomer_idx)
helm_string += '.'; // separator between monomers
add_monomer(helm_string, monomer_alias);
add_monomer_str(helm_string, monomer_alias);
monomer_idx++;
atom_idx_to_monomer_info.emplace(std::make_pair(atom_idx, std::make_tuple(helm_type, polymer_idx, monomer_idx)));

Expand All @@ -409,7 +413,7 @@ std::string SequenceSaver::saveHELM(BaseMolecule& mol, std::vector<std::deque<in
if (mon_class == kMonomerClassBASE)
{
helm_string += '('; // branch monomers in ()
add_monomer(helm_string, monomerAliasByName(mon_class, mol.getTemplateAtom(nei_atom_idx)));
add_monomer_str(helm_string, monomerAliasByName(mon_class, mol.getTemplateAtom(nei_atom_idx)));
monomer_idx++;
atom_idx_to_monomer_info.emplace(std::make_pair(nei_atom_idx, std::make_tuple(helm_type, polymer_idx, monomer_idx)));
used_atoms.emplace(nei_atom_idx);
Expand All @@ -425,7 +429,7 @@ std::string SequenceSaver::saveHELM(BaseMolecule& mol, std::vector<std::deque<in
}
if (phosphate.size())
{
add_monomer(helm_string, phosphate);
add_monomer_str(helm_string, phosphate);
monomer_idx++;
atom_idx_to_monomer_info.emplace(std::make_pair(phosphate_idx, std::make_tuple(helm_type, polymer_idx, monomer_idx)));
used_atoms.emplace(phosphate_idx);
Expand Down Expand Up @@ -954,7 +958,7 @@ void SequenceSaver::saveIdt(KetDocument& doc, std::vector<std::deque<std::string
sequence.pop_front();
if (IDT_STANDARD_BASES.count(base) == 0 && STANDARD_MIXED_BASES.count(base) == 0)
standard_base = false;
if (base_monomer.monomerType() == KetBaseMonomer::MonomerType::VarianMonomer)
if (base_monomer.monomerType() == KetBaseMonomer::MonomerType::AmbiguousMonomer)
{
variant_base = true;
std::string template_id = monomers.at(base_id)->templateId();
Expand Down Expand Up @@ -1140,6 +1144,50 @@ static const char* get_helm_class(MonomerClass monomer_class)
return kHELMPolymerTypeCHEM;
}

void SequenceSaver::add_monomer(KetDocument& document, const std::unique_ptr<KetBaseMonomer>& monomer, std::string& helm_string)
{
std::string monomer_str;
const auto& mon_templ = document.templates().at(monomer->templateId());
if (_library.monomerTemplates().count(mon_templ.id()) > 0)
{
monomer_str = _library.monomerTemplates().at(mon_templ.id()).getStringProp("alias");
}
else
{
// monomer not in library - generate smiles
auto tgroup = mon_templ.getTGroup();
auto* pmol = static_cast<Molecule*>(tgroup->fragment.get());

// convert Sup sgroup without name attachment points to rg-labels
auto& sgroups = pmol->sgroups;
for (int i = sgroups.begin(); i != sgroups.end(); i = sgroups.next(i))
{
auto& sgroup = sgroups.getSGroup(i);
if (sgroup.sgroup_type != SGroup::SG_TYPE_SUP)
continue;
Superatom& sa = static_cast<Superatom&>(sgroup);
for (int ap_id = sa.attachment_points.begin(); ap_id != sa.attachment_points.end(); ap_id = sa.attachment_points.next(ap_id))
{
auto& ap = sa.attachment_points.at(ap_id);
int leaving_atom = ap.lvidx;
int ap_idx = getAttachmentOrder(ap.apid.ptr()) + 1;
pmol->resetAtom(leaving_atom, ELEM_RSITE);
pmol->allowRGroupOnRSite(leaving_atom, ap_idx);
}
sgroups.remove(i);
}
std::string smiles;
StringOutput s_out(monomer_str);
SmilesSaver saver(s_out);
saver.separate_rsites = false;
saver.saveMolecule(*pmol);
}
if (monomer_str.size() == 1)
helm_string += monomer_str;
else
helm_string += '[' + monomer_str + ']';
}

std::string SequenceSaver::saveHELM(KetDocument& document, std::vector<std::deque<std::string>> sequences)
{
std::string helm_string = "";
Expand Down Expand Up @@ -1193,8 +1241,8 @@ std::string SequenceSaver::saveHELM(KetDocument& document, std::vector<std::dequ
if (monomer_class == MonomerClass::Base)
helm_string += '(';
if (monomer->monomerType() == KetBaseMonomer::MonomerType::Monomer)
add_monomer(helm_string, monomer->alias());
else if (monomer->monomerType() == KetBaseMonomer::MonomerType::VarianMonomer)
add_monomer(document, monomer, helm_string);
else if (monomer->monomerType() == KetBaseMonomer::MonomerType::AmbiguousMonomer)
{
const auto& templ = variant_templates.at(monomer->templateId());
if (monomer_class != MonomerClass::Base)
Expand Down Expand Up @@ -1270,7 +1318,6 @@ std::string SequenceSaver::saveHELM(KetDocument& document, std::vector<std::dequ
auto res = mol_atom_to_ap.try_emplace(mol_id);
auto& atom_to_ap = res.first;
static std::string apid_prefix{'R'};
Array<int> leaving_atoms;
for (int ap_id = sa.attachment_points.begin(); ap_id != sa.attachment_points.end(); ap_id = sa.attachment_points.next(ap_id))
{
auto& ap = sa.attachment_points.at(ap_id);
Expand Down Expand Up @@ -1366,3 +1413,7 @@ std::string SequenceSaver::saveHELM(KetDocument& document, std::vector<std::dequ
helm_string += "V2.0";
return helm_string;
}

#ifdef _MSC_VER
#pragma warning(pop)
#endif

0 comments on commit cca2772

Please sign in to comment.