void SequenceSaver::saveIdt()

in core/indigo-core/molecule/src/sequence_saver.cpp [867:1157]


void SequenceSaver::saveIdt(KetDocument& doc, std::vector<std::deque<std::string>> sequences, std::string& seq_text)
{
    auto& monomer_templates = doc.templates();
    auto& variant_monomer_templates = doc.ambiguousTemplates();
    auto& monomers = doc.monomers();
    if (doc.nonSequenceConnections().size() > 0)
        throw Error("Cannot save in IDT format - nonstandard connection found.");
    for (auto& sequence : sequences)
    {
        std::string seq_string;
        IdtModification modification = IdtModification::FIVE_PRIME_END;
        std::map<std::string, std::string> custom_variants;
        char custom_amb_monomers = 0;
        while (sequence.size() > 0)
        {
            auto monomer_id = sequence.front();
            sequence.pop_front();
            MonomerClass monomer_class = doc.getMonomerClass(monomer_id);
            auto& monomer = monomers.at(monomer_id)->alias();
            bool standard_sugar = true;
            bool standard_base = true;
            bool standard_phosphate = true;
            std::string sugar;
            std::string base;
            std::string phosphate;
            IdtModification possible_modification = modification;
            if (sequence.size() == 0) // last monomer
                if (seq_string.size() > 0)
                    modification = IdtModification::THREE_PRIME_END;
                else // corner case - only one monomer
                    possible_modification = IdtModification::THREE_PRIME_END;

            if (monomer_class == MonomerClass::Phosphate)
            {
                if (seq_string.size() > 0 && sequence.size()) // Inside the sequence
                    throw Error("Cannot save molecule in IDT format - expected sugar but found phosphate %s.", monomer.c_str());
                // first and last monomer can be phosphate "P" only
                if (monomer != "P")
                {
                    if (seq_string.size() > 0)
                        throw Error("Cannot save molecule in IDT format - phosphate %s cannot be last monomer in sequence.", monomer.c_str());
                    throw Error("Cannot save molecule in IDT format - phosphate %s cannot be first monomer in sequence.", monomer.c_str());
                }
                // This is 'P' at one of the end
                if (seq_string.size() == 0) // First monomer
                {
                    seq_string += "/5Phos/";
                    modification = IdtModification::INTERNAL;
                }
                else
                {
                    seq_string += "/3Phos/";
                    modification = IdtModification::THREE_PRIME_END;
                }
                continue;
            }
            else if (monomer_class == MonomerClass::CHEM || monomer_class == MonomerClass::DNA || monomer_class == MonomerClass::RNA)
            {
                auto write_name = [&](const IdtAlias& idtAlias) -> bool {
                    bool has_modification = idtAlias.hasModification(modification);
                    if (has_modification || (possible_modification != modification && idtAlias.hasModification(possible_modification)))
                    {
                        const std::string& idt_alias =
                            has_modification ? idtAlias.getModification(modification) : idtAlias.getModification(possible_modification);
                        seq_string += '/';
                        seq_string += idt_alias;
                        seq_string += '/';
                        return true;
                    }
                    return false;
                };
                // Try to find in library
                const std::string& lib_monomer_id = _library.getMonomerTemplateIdByAlias(monomer_class, monomer);
                if (lib_monomer_id.size()) // Monomer in library
                {
                    if (write_name(_library.getMonomerTemplateById(lib_monomer_id).idtAlias()))
                    {
                        if (modification == IdtModification::FIVE_PRIME_END)
                            modification = IdtModification::INTERNAL;
                        continue;
                    }
                }

                // Check template for IdtAlias
                auto& monomer_template = doc.getMonomerTemplate(monomers.at(monomer_id)->templateId());
                if (write_name(monomer_template.idtAlias()))
                {
                    modification = IdtModification::INTERNAL;
                    continue;
                }
                else
                {
                    if (monomer_template.templateType() == KetBaseMonomerTemplate::TemplateType::MonomerTemplate &&
                        static_cast<const MonomerTemplate&>(monomer_template).unresolved())
                        throw Error("Unresolved monomer '%s' has no IDT alias.", monomer.c_str());
                    else if (monomer_class == MonomerClass::DNA || monomer_class == MonomerClass::RNA)
                        throw Error("Nucleotide '%s' has no IDT alias.", monomer.c_str());
                    else // CHEM
                        throw Error("Chem '%s' has no IDT alias.", monomer.c_str());
                }
            }
            else if (monomer_class != MonomerClass::Sugar)
            {
                throw Error("Cannot save molecule in IDT format - expected sugar but found %s monomer %s.",
                            MonomerTemplate::MonomerClassToStr(monomer_class).c_str(), monomer.c_str());
            }

            sugar = monomer;
            if (IDT_STANDARD_SUGARS.count(monomer) == 0)
                standard_sugar = false;

            bool variant_base = false;
            if (sequence.size() > 0)
            { // process base
                auto base_id = sequence.front();
                if (doc.getMonomerClass(base_id) == MonomerClass::Base)
                {
                    const auto& base_monomer = *monomers.at(base_id);
                    base = base_monomer.alias();
                    sequence.pop_front();
                    if (IDT_STANDARD_BASES.count(base) == 0 && STANDARD_MIXED_BASES.count(base) == 0)
                        standard_base = false;
                    if (base_monomer.monomerType() == KetBaseMonomer::MonomerType::AmbiguousMonomer)
                    {
                        variant_base = true;
                        std::string template_id = monomers.at(base_id)->templateId();
                        if (custom_variants.count(template_id) > 0)
                        {
                            base = custom_variants.at(template_id);
                        }
                        else
                        {
                            std::array<float, 4> ratios{0, 0, 0, 0};
                            bool has_ratio = false;
                            std::set<std::string> aliases;
                            std::string s_aliases;
                            const auto& ambiguous_template = doc.ambiguousTemplates().at(template_id);
                            if (ambiguous_template.subtype() != "mixture")
                                throw Error("Cannot save IDT - only mixture supported but found %s.", ambiguous_template.subtype().c_str());
                            for (auto& option : ambiguous_template.options())
                            {
                                auto& opt_alias = doc.templates().at(option.templateId()).getStringProp("alias");
                                aliases.emplace(opt_alias);
                                if (s_aliases.size() > 0)
                                    s_aliases += ", ";
                                s_aliases += opt_alias;
                                const auto& it = IDT_BASE_TO_RATIO_IDX.find(opt_alias);
                                if (it == IDT_BASE_TO_RATIO_IDX.end())
                                    throw Error("Cannot save IDT - unknown monomer template %s", opt_alias.c_str());
                                auto ratio = option.ratio();

                                if (ratio.has_value())
                                {
                                    ratios[it->second] = ratio.value();
                                    has_ratio = true;
                                }
                                else if (has_ratio)
                                    throw Error("Cannot save IDT - ambiguous monomer template '%s' use template '%s' without ratio.", base.c_str(),
                                                opt_alias.c_str());
                            }
                            if (STANDARD_MIXED_BASES_TO_ALIAS.count(aliases) == 0)
                                throw Error("Cannot save IDT - unknown mixture of monomers %s", s_aliases.c_str());
                            base = STANDARD_MIXED_BASES_TO_ALIAS.at(aliases);
                            if (RNA_DNA_MIXED_BASES.count(base) == 0)
                            {
                                if (base[0] == 'r')
                                {
                                    base.erase(base.begin());
                                    if (sugar != "R")
                                        throw Error("Cannot save IDT - RNA ambigous base connected to DNA sugar.");
                                }
                                else if (sugar == "R")
                                    throw Error("Cannot save IDT - DNA ambigous base connected to RNA sugar.");
                            }
                            if (has_ratio)
                            {
                                std::string base_short = '(' + base;
                                base_short += '1' + custom_amb_monomers++;
                                base = base_short;
                                base += ':';
                                // add ratios
                                for (auto r : ratios)
                                {
                                    int ir = static_cast<int>(std::round(r));
                                    std::string sr = std::to_string(ir);
                                    if (sr.size() < 2)
                                        sr = '0' + sr;
                                    base += sr;
                                }
                                base += ')';
                                base_short += ')';
                                custom_variants.emplace(template_id, base_short);
                            }
                            else
                                custom_variants.emplace(template_id, base);
                        }
                    }
                }
            }
            else
            {
                throw Error("Cannot save molecule in IDT format - sugar whithout base.");
            }

            if (sequence.size() > 0)
            { // process phosphate
                auto phosphate_id = sequence.front();
                sequence.pop_front();
                MonomerClass phosphate_class = doc.getMonomerClass(phosphate_id);
                if (phosphate_class != MonomerClass::Phosphate)
                    throw Error("Cannot save molecule in IDT format - phosphate expected between sugars but %s monomer %s found.",
                                MonomerTemplate::MonomerClassToStr(phosphate_class).c_str(), monomer.c_str());
                phosphate = monomers.at(phosphate_id)->alias();
                if (phosphate != "P" && phosphate != "sP")
                    standard_phosphate = false;
            }
            else
            {
                modification = IdtModification::THREE_PRIME_END;
                phosphate = "";
                standard_phosphate = true;
            }

            bool add_asterisk = false;
            if (phosphate == "sP")
            {
                phosphate = "P"; // Assume that modified monomers always contains P and modified to sP with *. TODO: confirm it with BA
                add_asterisk = true;
            }
            if ((standard_base || variant_base) && standard_phosphate && standard_sugar)
            {
                sugar = IDT_STANDARD_SUGARS.at(sugar);
                if (sugar.size())
                    seq_string += sugar;
                seq_string += base == "In" ? "I" : base; // Inosine coded as I in IDT
                if (sequence.size() == 0 && phosphate.size())
                {
                    if (phosphate != "P" || add_asterisk)
                        throw Error("Cannot save molecule in IDT format - phosphate %s cannot be last monomer in sequence.", monomer.c_str());
                    seq_string += "/3Phos/";
                }
            }
            else
            {
                // Try to find sugar,base,phosphate group template
                const std::string& sugar_id = _library.getMonomerTemplateIdByAlias(MonomerClass::Sugar, sugar);
                const std::string& phosphate_id = _library.getMonomerTemplateIdByAlias(MonomerClass::Phosphate, phosphate);
                std::string base_id;
                if (base.size())
                    base_id = _library.getMonomerTemplateIdByAlias(MonomerClass::Base, base);
                const std::string& idt_alias = _library.getIdtAliasByModification(modification, sugar_id, base_id, phosphate_id);
                if (idt_alias.size())
                {
                    seq_string += '/';
                    seq_string += idt_alias;
                    seq_string += '/';
                }
                else
                {
                    if (base.size())
                    {
                        if (phosphate.size())
                            throw Error("IDT alias for group sugar:%s base:%s phosphate:%s not found.", sugar.c_str(), base.c_str(), phosphate.c_str());
                        else
                            throw Error("IDT alias for group sugar:%s base:%s not found.", sugar.c_str(), base.c_str());
                    }
                    else
                    {
                        if (phosphate.size())

                            throw Error("IDT alias for group sugar:%s phosphate:%s not found.", sugar.c_str(), phosphate.c_str());
                        else
                            throw Error("IDT alias for sugar:%s not found.", sugar.c_str());
                    }
                }
            }

            if (add_asterisk)
            {
                seq_string += "*";
                phosphate = "sP";
            }

            if (modification == IdtModification::FIVE_PRIME_END)
                modification = IdtModification::INTERNAL;
        }
        if (seq_text.size() > 0)
            seq_text += "\n";
        seq_text += seq_string;
    }
}