in core/indigo-core/molecule/src/sequence_saver.cpp [867:1157]
void SequenceSaver::saveIdt(KetDocument& doc, std::vector<std::deque<std::string>> sequences, std::string& seq_text)
{
auto& monomer_templates = doc.templates();
auto& variant_monomer_templates = doc.ambiguousTemplates();
auto& monomers = doc.monomers();
if (doc.nonSequenceConnections().size() > 0)
throw Error("Cannot save in IDT format - nonstandard connection found.");
for (auto& sequence : sequences)
{
std::string seq_string;
IdtModification modification = IdtModification::FIVE_PRIME_END;
std::map<std::string, std::string> custom_variants;
char custom_amb_monomers = 0;
while (sequence.size() > 0)
{
auto monomer_id = sequence.front();
sequence.pop_front();
MonomerClass monomer_class = doc.getMonomerClass(monomer_id);
auto& monomer = monomers.at(monomer_id)->alias();
bool standard_sugar = true;
bool standard_base = true;
bool standard_phosphate = true;
std::string sugar;
std::string base;
std::string phosphate;
IdtModification possible_modification = modification;
if (sequence.size() == 0) // last monomer
if (seq_string.size() > 0)
modification = IdtModification::THREE_PRIME_END;
else // corner case - only one monomer
possible_modification = IdtModification::THREE_PRIME_END;
if (monomer_class == MonomerClass::Phosphate)
{
if (seq_string.size() > 0 && sequence.size()) // Inside the sequence
throw Error("Cannot save molecule in IDT format - expected sugar but found phosphate %s.", monomer.c_str());
// first and last monomer can be phosphate "P" only
if (monomer != "P")
{
if (seq_string.size() > 0)
throw Error("Cannot save molecule in IDT format - phosphate %s cannot be last monomer in sequence.", monomer.c_str());
throw Error("Cannot save molecule in IDT format - phosphate %s cannot be first monomer in sequence.", monomer.c_str());
}
// This is 'P' at one of the end
if (seq_string.size() == 0) // First monomer
{
seq_string += "/5Phos/";
modification = IdtModification::INTERNAL;
}
else
{
seq_string += "/3Phos/";
modification = IdtModification::THREE_PRIME_END;
}
continue;
}
else if (monomer_class == MonomerClass::CHEM || monomer_class == MonomerClass::DNA || monomer_class == MonomerClass::RNA)
{
auto write_name = [&](const IdtAlias& idtAlias) -> bool {
bool has_modification = idtAlias.hasModification(modification);
if (has_modification || (possible_modification != modification && idtAlias.hasModification(possible_modification)))
{
const std::string& idt_alias =
has_modification ? idtAlias.getModification(modification) : idtAlias.getModification(possible_modification);
seq_string += '/';
seq_string += idt_alias;
seq_string += '/';
return true;
}
return false;
};
// Try to find in library
const std::string& lib_monomer_id = _library.getMonomerTemplateIdByAlias(monomer_class, monomer);
if (lib_monomer_id.size()) // Monomer in library
{
if (write_name(_library.getMonomerTemplateById(lib_monomer_id).idtAlias()))
{
if (modification == IdtModification::FIVE_PRIME_END)
modification = IdtModification::INTERNAL;
continue;
}
}
// Check template for IdtAlias
auto& monomer_template = doc.getMonomerTemplate(monomers.at(monomer_id)->templateId());
if (write_name(monomer_template.idtAlias()))
{
modification = IdtModification::INTERNAL;
continue;
}
else
{
if (monomer_template.templateType() == KetBaseMonomerTemplate::TemplateType::MonomerTemplate &&
static_cast<const MonomerTemplate&>(monomer_template).unresolved())
throw Error("Unresolved monomer '%s' has no IDT alias.", monomer.c_str());
else if (monomer_class == MonomerClass::DNA || monomer_class == MonomerClass::RNA)
throw Error("Nucleotide '%s' has no IDT alias.", monomer.c_str());
else // CHEM
throw Error("Chem '%s' has no IDT alias.", monomer.c_str());
}
}
else if (monomer_class != MonomerClass::Sugar)
{
throw Error("Cannot save molecule in IDT format - expected sugar but found %s monomer %s.",
MonomerTemplate::MonomerClassToStr(monomer_class).c_str(), monomer.c_str());
}
sugar = monomer;
if (IDT_STANDARD_SUGARS.count(monomer) == 0)
standard_sugar = false;
bool variant_base = false;
if (sequence.size() > 0)
{ // process base
auto base_id = sequence.front();
if (doc.getMonomerClass(base_id) == MonomerClass::Base)
{
const auto& base_monomer = *monomers.at(base_id);
base = base_monomer.alias();
sequence.pop_front();
if (IDT_STANDARD_BASES.count(base) == 0 && STANDARD_MIXED_BASES.count(base) == 0)
standard_base = false;
if (base_monomer.monomerType() == KetBaseMonomer::MonomerType::AmbiguousMonomer)
{
variant_base = true;
std::string template_id = monomers.at(base_id)->templateId();
if (custom_variants.count(template_id) > 0)
{
base = custom_variants.at(template_id);
}
else
{
std::array<float, 4> ratios{0, 0, 0, 0};
bool has_ratio = false;
std::set<std::string> aliases;
std::string s_aliases;
const auto& ambiguous_template = doc.ambiguousTemplates().at(template_id);
if (ambiguous_template.subtype() != "mixture")
throw Error("Cannot save IDT - only mixture supported but found %s.", ambiguous_template.subtype().c_str());
for (auto& option : ambiguous_template.options())
{
auto& opt_alias = doc.templates().at(option.templateId()).getStringProp("alias");
aliases.emplace(opt_alias);
if (s_aliases.size() > 0)
s_aliases += ", ";
s_aliases += opt_alias;
const auto& it = IDT_BASE_TO_RATIO_IDX.find(opt_alias);
if (it == IDT_BASE_TO_RATIO_IDX.end())
throw Error("Cannot save IDT - unknown monomer template %s", opt_alias.c_str());
auto ratio = option.ratio();
if (ratio.has_value())
{
ratios[it->second] = ratio.value();
has_ratio = true;
}
else if (has_ratio)
throw Error("Cannot save IDT - ambiguous monomer template '%s' use template '%s' without ratio.", base.c_str(),
opt_alias.c_str());
}
if (STANDARD_MIXED_BASES_TO_ALIAS.count(aliases) == 0)
throw Error("Cannot save IDT - unknown mixture of monomers %s", s_aliases.c_str());
base = STANDARD_MIXED_BASES_TO_ALIAS.at(aliases);
if (RNA_DNA_MIXED_BASES.count(base) == 0)
{
if (base[0] == 'r')
{
base.erase(base.begin());
if (sugar != "R")
throw Error("Cannot save IDT - RNA ambigous base connected to DNA sugar.");
}
else if (sugar == "R")
throw Error("Cannot save IDT - DNA ambigous base connected to RNA sugar.");
}
if (has_ratio)
{
std::string base_short = '(' + base;
base_short += '1' + custom_amb_monomers++;
base = base_short;
base += ':';
// add ratios
for (auto r : ratios)
{
int ir = static_cast<int>(std::round(r));
std::string sr = std::to_string(ir);
if (sr.size() < 2)
sr = '0' + sr;
base += sr;
}
base += ')';
base_short += ')';
custom_variants.emplace(template_id, base_short);
}
else
custom_variants.emplace(template_id, base);
}
}
}
}
else
{
throw Error("Cannot save molecule in IDT format - sugar whithout base.");
}
if (sequence.size() > 0)
{ // process phosphate
auto phosphate_id = sequence.front();
sequence.pop_front();
MonomerClass phosphate_class = doc.getMonomerClass(phosphate_id);
if (phosphate_class != MonomerClass::Phosphate)
throw Error("Cannot save molecule in IDT format - phosphate expected between sugars but %s monomer %s found.",
MonomerTemplate::MonomerClassToStr(phosphate_class).c_str(), monomer.c_str());
phosphate = monomers.at(phosphate_id)->alias();
if (phosphate != "P" && phosphate != "sP")
standard_phosphate = false;
}
else
{
modification = IdtModification::THREE_PRIME_END;
phosphate = "";
standard_phosphate = true;
}
bool add_asterisk = false;
if (phosphate == "sP")
{
phosphate = "P"; // Assume that modified monomers always contains P and modified to sP with *. TODO: confirm it with BA
add_asterisk = true;
}
if ((standard_base || variant_base) && standard_phosphate && standard_sugar)
{
sugar = IDT_STANDARD_SUGARS.at(sugar);
if (sugar.size())
seq_string += sugar;
seq_string += base == "In" ? "I" : base; // Inosine coded as I in IDT
if (sequence.size() == 0 && phosphate.size())
{
if (phosphate != "P" || add_asterisk)
throw Error("Cannot save molecule in IDT format - phosphate %s cannot be last monomer in sequence.", monomer.c_str());
seq_string += "/3Phos/";
}
}
else
{
// Try to find sugar,base,phosphate group template
const std::string& sugar_id = _library.getMonomerTemplateIdByAlias(MonomerClass::Sugar, sugar);
const std::string& phosphate_id = _library.getMonomerTemplateIdByAlias(MonomerClass::Phosphate, phosphate);
std::string base_id;
if (base.size())
base_id = _library.getMonomerTemplateIdByAlias(MonomerClass::Base, base);
const std::string& idt_alias = _library.getIdtAliasByModification(modification, sugar_id, base_id, phosphate_id);
if (idt_alias.size())
{
seq_string += '/';
seq_string += idt_alias;
seq_string += '/';
}
else
{
if (base.size())
{
if (phosphate.size())
throw Error("IDT alias for group sugar:%s base:%s phosphate:%s not found.", sugar.c_str(), base.c_str(), phosphate.c_str());
else
throw Error("IDT alias for group sugar:%s base:%s not found.", sugar.c_str(), base.c_str());
}
else
{
if (phosphate.size())
throw Error("IDT alias for group sugar:%s phosphate:%s not found.", sugar.c_str(), phosphate.c_str());
else
throw Error("IDT alias for sugar:%s not found.", sugar.c_str());
}
}
}
if (add_asterisk)
{
seq_string += "*";
phosphate = "sP";
}
if (modification == IdtModification::FIVE_PRIME_END)
modification = IdtModification::INTERNAL;
}
if (seq_text.size() > 0)
seq_text += "\n";
seq_text += seq_string;
}
}