void MolfileSaver::_writeCtab()

in core/indigo-core/molecule/src/molfile_saver.cpp [446:1076]


void MolfileSaver::_writeCtab(Output& output, BaseMolecule& mol, bool query)
{
    _handleCIP(mol);
    if (mol.tgroups.getTGroupCount())
        _handleMonomers(mol);

    QueryMolecule* qmol = 0;

    if (query)
        qmol = (QueryMolecule*)(&mol);

    int i;
    int iw = 1;
    QS_DEF(Array<char>, buf);
    std::list<int> implicit_sgroups_indexes;

    _atom_mapping.clear_resize(mol.vertexEnd());
    _bond_mapping.clear_resize(mol.edgeEnd());

    for (i = mol.vertexBegin(); i < mol.vertexEnd(); i = mol.vertexNext(i), iw++)
        _atom_mapping[i] = iw;

    if (add_implicit_h && qmol == 0)
    {
        for (i = mol.vertexBegin(); i < mol.vertexEnd(); i = mol.vertexNext(i))
        {
            int atom_number = mol.getAtomNumber(i);
            int charge = mol.getAtomCharge(i);
            int hcount = MoleculeSavers::getHCount(mol, i, atom_number, charge);
            if (Molecule::shouldWriteHCount(mol.asMolecule(), i) && hcount > 0)
            {
                int sg_idx = mol.sgroups.addSGroup(SGroup::SG_TYPE_DAT);
                implicit_sgroups_indexes.push_front(sg_idx);
                DataSGroup& sgroup = static_cast<DataSGroup&>(mol.sgroups.getSGroup(sg_idx));
                sgroup.setMrv_implicit(i, hcount);
            }
        }
    }

    output.writeStringCR("M  V30 BEGIN CTAB");
    output.printfCR("M  V30 COUNTS %d %d %d 0 0", mol.vertexCount(), mol.edgeCount(), mol.countSGroups());
    output.writeStringCR("M  V30 BEGIN ATOM");

    std::stringstream coords;
    for (i = mol.vertexBegin(); i < mol.vertexEnd(); i = mol.vertexNext(i))
    {
        int atom_number = mol.getAtomNumber(i);
        int isotope = mol.getAtomIsotope(i);
        ArrayOutput out(buf);

        out.printf("%d ", _atom_mapping[i]);
        std::vector<std::unique_ptr<QueryMolecule::Atom>> list;
        std::map<int, std::unique_ptr<QueryMolecule::Atom>> properties;
        int query_atom_type;

        if (atom_number == ELEM_H && isotope == DEUTERIUM)
        {
            out.writeChar('D');
            isotope = 0;
        }
        else if (atom_number == ELEM_H && isotope == TRITIUM)
        {
            out.writeChar('T');
            isotope = 0;
        }
        else if (mol.isPseudoAtom(i))
            out.writeString(mol.getPseudoAtom(i));
        else if (mol.isTemplateAtom(i))
            out.writeString(mol.getTemplateAtom(i));
        else if (mol.isRSite(i))
            out.writeString("R#");
        else if (atom_number > 0)
        {
            _writeAtomLabel(out, atom_number);
        }
        else if (qmol != 0 && (query_atom_type = QueryMolecule::parseQueryAtomSmarts(*qmol, i, list, properties)) != -1)
        {
            if (query_atom_type == QueryMolecule::QUERY_ATOM_A)
                out.writeChar('A');
            else if (query_atom_type == QueryMolecule::QUERY_ATOM_Q)
                out.writeChar('Q');
            else if (query_atom_type == QueryMolecule::QUERY_ATOM_X)
                out.writeChar('X');
            else if (query_atom_type == QueryMolecule::QUERY_ATOM_M)
                out.writeChar('M');
            else if (query_atom_type == QueryMolecule::QUERY_ATOM_AH)
                out.writeString("AH");
            else if (query_atom_type == QueryMolecule::QUERY_ATOM_QH)
                out.writeString("QH");
            else if (query_atom_type == QueryMolecule::QUERY_ATOM_XH)
                out.writeString("XH");
            else if (query_atom_type == QueryMolecule::QUERY_ATOM_MH)
                out.writeString("MH");
            else if (query_atom_type == QueryMolecule::QUERY_ATOM_LIST || query_atom_type == QueryMolecule::QUERY_ATOM_NOTLIST)
            {
                if (query_atom_type == QueryMolecule::QUERY_ATOM_NOTLIST)
                    out.writeString("NOT");

                out.writeChar('[');

                bool not_first = false;
                for (auto& qatom : list)
                {
                    if (not_first)
                        out.writeChar(',');
                    else
                        not_first = true;

                    if (qatom->type == QueryMolecule::ATOM_NUMBER)
                        _writeAtomLabel(out, qatom->value_max);
                    else if (qatom->type == QueryMolecule::ATOM_PSEUDO)
                        out.writeString(qatom->alias.ptr());
                }
                out.writeChar(']');
            }
        }
        else if (atom_number == -1)
            out.writeChar('A');
        else
            throw Error("molfile 3000: can not save atom %d because of unsupported "
                        "query feature",
                        i);

        int aam = 0, ecflag = 0, irflag = 0;

        aam = mol.reaction_atom_mapping[i];
        irflag = mol.reaction_atom_inversion[i];
        ecflag = mol.reaction_atom_exact_change[i];

        Vec3f& xyz = mol.getAtomXyz(i);
        int charge = mol.getAtomCharge(i);
        int radical = 0;
        int valence = mol.getExplicitValence(i);
        int stereo_parity = _getStereocenterParity(mol, i);

        if (!mol.isRSite(i) && !mol.isPseudoAtom(i) && !mol.isTemplateAtom(i))
            radical = mol.getAtomRadical_NoThrow(i, 0);

        /*
         * Trailing zeros workaround
         */
        convert_xyz_to_string(xyz, coords);

        out.printf(" %s %d", coords.str().c_str(), aam);

        if ((mol.isQueryMolecule() && charge != CHARGE_UNKNOWN) || (!mol.isQueryMolecule() && charge != 0))
            out.printf(" CHG=%d", charge);

        if (qmol != 0)
        {
            int hcount = MoleculeSavers::getHCount(mol, i, atom_number, charge);
            if (hcount > 0)
                out.printf(" HCOUNT=%d", hcount);
            else if (hcount == 0)
                out.printf(" HCOUNT=-1");
        }

        if (radical > 0)
            out.printf(" RAD=%d", radical);
        if (stereo_parity > 0)
            out.printf(" CFG=%d", stereo_parity);
        if (isotope > 0)
            out.printf(" MASS=%d", isotope);
        if (valence > 0)
            out.printf(" VAL=%d", valence);
        if (valence == 0)
            out.printf(" VAL=-1");
        if (irflag > 0)
            out.printf(" INVRET=%d", irflag);
        if (ecflag > 0)
            out.printf(" EXACHG=%d", ecflag);

        if (mol.isRSite(i))
        {
            int k;

            QS_DEF(Array<int>, rg_list);
            mol.getAllowedRGroups(i, rg_list);

            if (rg_list.size() > 0)
            {
                out.printf(" RGROUPS=(%d", rg_list.size());
                for (k = 0; k < rg_list.size(); k++)
                    out.printf(" %d", rg_list[k]);
                out.writeChar(')');

                if (!_checkAttPointOrder(mol, i))
                {
                    const Vertex& vertex = mol.getVertex(i);

                    out.printf(" ATTCHORD=(%d", vertex.degree() * 2);
                    for (k = 0; k < vertex.degree(); k++)
                        out.printf(" %d %d", _atom_mapping[mol.getRSiteAttachmentPointByOrder(i, k)], k + 1);

                    out.writeChar(')');
                }
            }
        }

        if (mol.isTemplateAtom(i))
        {
            std::string tclass;
            if (mol.getTemplateAtomClass(i) != 0 && strlen(mol.getTemplateAtomClass(i)) > 0)
            {
                tclass = mol.getTemplateAtomClass(i);
                // convert CHEM to LINKER for BIOVIA
                out.printf(" CLASS=%s", tclass == kMonomerClassCHEM ? kMonomerClassLINKER : tclass.c_str());
            }

            if (mol.getTemplateAtomSeqid(i) != -1 && tclass != kMonomerClassCHEM) // No SEQID for chem
                out.printf(" SEQID=%d", mol.getTemplateAtomSeqid(i));

            // if (mol.getTemplateAtomSeqName(i) && strlen(mol.getTemplateAtomSeqName(i)))
            //    out.printf(" SEQNAME=%s", mol.getTemplateAtomSeqName(i));

            if (mol.template_attachment_points.size() > 0)
            {
                int ap_count = mol.getTemplateAtomAttachmentPointsCount(i);
                if (ap_count)
                {
                    out.printf(" ATTCHORD=(%d", ap_count * 2);
                    for (int j = mol.template_attachment_points.begin(); j != mol.template_attachment_points.end(); j = mol.template_attachment_points.next(j))
                    {
                        BaseMolecule::TemplateAttPoint& ap = mol.template_attachment_points.at(j);
                        if (ap.ap_occur_idx == i)
                        {
                            out.printf(" %d %s", _atom_mapping[ap.ap_aidx], ap.ap_id.ptr());
                        }
                    }
                    out.printf(")");
                }
            }
        }

        if (mol.attachmentPointCount() > 0)
        {
            int val = 0;

            for (int idx = 1; idx <= mol.attachmentPointCount(); idx++)
            {
                for (int j = 0; mol.getAttachmentPoint(idx, j) != -1; j++)
                    if (mol.getAttachmentPoint(idx, j) == i)
                    {
                        val |= 1 << (idx - 1);
                        break;
                    }
            }

            if (val > 0)
                out.printf(" ATTCHPT=%d", val == 3 ? -1 : val);
        }

        if (qmol != 0)
        {
            int unsat;
            if (qmol->getAtom(i).sureValue(QueryMolecule::ATOM_UNSATURATION, unsat))
                out.printf(" UNSAT=1");
            int subst;
            if (MoleculeSavers::getSubstitutionCountFlagValue(*qmol, i, subst))
                out.printf(" SUBST=%d", subst);
            int rbc;
            if (MoleculeSavers::getRingBondCountFlagValue(*qmol, i, rbc))
                out.printf(" RBCNT=%d", rbc > MAX_RING_BOND_COUNT ? MAX_RING_BOND_COUNT : rbc);
        }

        _writeMultiString(output, buf.ptr(), buf.size());
    }

    output.writeStringCR("M  V30 END ATOM");
    output.writeStringCR("M  V30 BEGIN BOND");

    iw = 1;

    for (i = mol.edgeBegin(); i < mol.edgeEnd(); i = mol.edgeNext(i), iw++)
    {
        const Edge& edge = mol.getEdge(i);
        int bond_order = mol.getBondOrder(i);
        ArrayOutput out(buf);

        _bond_mapping[i] = iw;

        if (bond_order < 0 && qmol != 0)
        {
            int qb = QueryMolecule::getQueryBondType(qmol->getBond(i));

            if (qb == _BOND_SINGLE_OR_DOUBLE || qb == _BOND_SINGLE_OR_AROMATIC || qb == _BOND_DOUBLE_OR_AROMATIC || qb == _BOND_ANY)
                bond_order = qb;
        }

        if (bond_order < 0)
            throw Error("unrepresentable query bond");

        if (bond_order == BOND_ZERO)
        {
            bond_order = _BOND_COORDINATION;
            if ((mol.getAtomNumber(edge.beg) == ELEM_H) || (mol.getAtomNumber(edge.end) == ELEM_H))
                bond_order = _BOND_HYDROGEN;
        }

        out.printf("%d %d %d %d", iw, bond_order, _atom_mapping[edge.beg], _atom_mapping[edge.end]);

        int direction = mol.getBondDirection(i);

        switch (direction)
        {
        case BOND_UP:
            out.printf(" CFG=1");
            break;
        case BOND_EITHER:
            out.printf(" CFG=2");
            break;
        case BOND_DOWN:
            out.printf(" CFG=3");
            break;
        case 0:
            if (mol.cis_trans.isIgnored(i))
                if (!_hasNeighborEitherBond(mol, i))
                    out.printf(" CFG=2");
            break;
        }

        int reacting_center = 0;
        reacting_center = mol.reaction_bond_reacting_center[i];

        if (reacting_center != 0)
            out.printf(" RXCTR=%d", reacting_center);

        int indigo_topology = -1;
        if (qmol != 0)
            qmol->getBond(i).sureValue(QueryMolecule::BOND_TOPOLOGY, indigo_topology);

        int topology = 0;
        if (indigo_topology == TOPOLOGY_RING)
            topology = 1;
        else if (indigo_topology == TOPOLOGY_CHAIN)
            topology = 2;

        if (topology != 0)
            out.printf(" TOPO=%d", topology);

        _writeMultiString(output, buf.ptr(), buf.size());
    }

    output.writeStringCR("M  V30 END BOND");

    MoleculeStereocenters& stereocenters = mol.stereocenters;

    if (stereocenters.begin() != stereocenters.end() || mol.hasHighlighting())
    {
        output.writeStringCR("M  V30 BEGIN COLLECTION");

        QS_DEF(Array<int>, processed);

        processed.clear_resize(mol.vertexEnd());
        processed.zerofill();

        for (i = mol.vertexBegin(); i != mol.vertexEnd(); i = mol.vertexNext(i))
        {
            if (processed[i])
                continue;

            ArrayOutput out(buf);
            int j, type = stereocenters.getType(i);

            if (type == MoleculeStereocenters::ATOM_ABS)
                out.writeString("MDLV30/STEABS ATOMS=(");
            else if (type == MoleculeStereocenters::ATOM_OR)
                out.printf("MDLV30/STEREL%d ATOMS=(", stereocenters.getGroup(i));
            else if (type == MoleculeStereocenters::ATOM_AND)
                out.printf("MDLV30/STERAC%d ATOMS=(", stereocenters.getGroup(i));
            else
                continue;

            QS_DEF(Array<int>, list);

            list.clear();
            list.push(i);

            for (j = mol.vertexNext(i); j < mol.vertexEnd(); j = mol.vertexNext(j))
                if (stereocenters.sameGroup(i, j))
                {
                    list.push(j);
                    processed[j] = 1;
                }

            out.printf("%d", list.size());
            for (j = 0; j < list.size(); j++)
                out.printf(" %d", _atom_mapping[list[j]]);
            out.writeChar(')');

            _writeMultiString(output, buf.ptr(), buf.size());
        }

        if (mol.hasHighlighting())
        {
            if (mol.countHighlightedBonds() > 0)
            {
                ArrayOutput out(buf);

                out.printf("MDLV30/HILITE BONDS=(%d", mol.countHighlightedBonds());

                for (i = mol.edgeBegin(); i != mol.edgeEnd(); i = mol.edgeNext(i))
                    if (mol.isBondHighlighted(i))
                        out.printf(" %d", _bond_mapping[i]);
                out.writeChar(')');

                _writeMultiString(output, buf.ptr(), buf.size());
            }
            if (mol.countHighlightedAtoms() > 0)
            {
                ArrayOutput out(buf);
                out.printf("MDLV30/HILITE ATOMS=(%d", mol.countHighlightedAtoms());
                for (i = mol.vertexBegin(); i != mol.vertexEnd(); i = mol.vertexNext(i))
                    if (mol.isAtomHighlighted(i))
                        out.printf(" %d", _atom_mapping[i]);
                out.writeChar(')');

                _writeMultiString(output, buf.ptr(), buf.size());
            }
        }
        if (mol.custom_collections.size() > 0)
        {
            for (i = mol.custom_collections.begin(); i != mol.custom_collections.end(); i = mol.custom_collections.next(i))
            {
                ArrayOutput out(buf);
                out.printf("%s", mol.custom_collections.at(i));
                _writeMultiString(output, buf.ptr(), buf.size());
            }
        }

        output.writeStringCR("M  V30 END COLLECTION");
    }

    QS_DEF(Array<int>, sgs_sorted);
    _checkSGroupIndices(mol, sgs_sorted);

    if (mol.countSGroups() > 0)
    {
        MoleculeSGroups* sgroups = &mol.sgroups;
        int idx = 1;

        output.writeStringCR("M  V30 BEGIN SGROUP");
        for (i = 0; i < sgs_sorted.size(); i++)
        {
            ArrayOutput out(buf);
            int sg_idx = sgs_sorted[i];
            SGroup& sgroup = sgroups->getSGroup(sg_idx);
            _writeGenericSGroup3000(sgroup, idx++, out);
            if (sgroup.sgroup_type == SGroup::SG_TYPE_GEN)
            {
                _writeMultiString(output, buf.ptr(), buf.size());
            }
            else if (sgroup.sgroup_type == SGroup::SG_TYPE_SUP)
            {
                Superatom& sup = static_cast<Superatom&>(sgroup);
                if (sup.bond_connections.size() > 0)
                {
                    for (int j = 0; j < sup.bond_connections.size(); j++)
                    {
                        out.printf(" CSTATE=(4 %d %f %f %f)", _bond_mapping[sup.bond_connections[j].bond_idx], sup.bond_connections[j].bond_dir.x,
                                   sup.bond_connections[j].bond_dir.y, 0.f);
                    }
                }
                if (sup.subscript.size() > 1)
                {
                    if (sup.subscript.find(' ') > -1)
                        out.printf(" LABEL=\"%s\"", sup.subscript.ptr());
                    else
                        out.printf(" LABEL=%s", sup.subscript.ptr());
                }
                // convert CHEM to LINKER for BIOVIA
                if (sup.sa_class.size() > 1)
                    out.printf(" CLASS=%s", sup.sa_class.ptr() == std::string(kMonomerClassCHEM) ? kMonomerClassLINKER : sup.sa_class.ptr());
                if (sup.contracted == DisplayOption::Expanded)
                    out.printf(" ESTATE=E");
                if (sup.attachment_points.size() > 0)
                {
                    for (int j = sup.attachment_points.begin(); j < sup.attachment_points.end(); j = sup.attachment_points.next(j))
                    {
                        int leave_idx = 0;
                        if (sup.attachment_points[j].lvidx > -1)
                            leave_idx = _atom_mapping[sup.attachment_points[j].lvidx];

                        out.printf(" SAP=(3 %d %d %s)", _atom_mapping[sup.attachment_points[j].aidx], leave_idx, sup.attachment_points[j].apid.ptr());
                    }
                }
                if (sup.seqid > 0)
                    out.printf(" SEQID=%d", sup.seqid);

                if (sup.sa_natreplace.size() > 1)
                    out.printf(" NATREPLACE=%s", sup.sa_natreplace.ptr());

                _writeMultiString(output, buf.ptr(), buf.size());
            }
            else if (sgroup.sgroup_type == SGroup::SG_TYPE_DAT)
            {
                DataSGroup& dsg = static_cast<DataSGroup&>(sgroup);

                const char* name = dsg.name.ptr();
                if (name != 0 && strlen(name) > 0)
                {
                    out.writeString(" FIELDNAME=");
                    bool space_found = (strchr(name, ' ') != NULL);
                    if (space_found)
                        out.writeString("\"");
                    out.writeString(name);
                    if (space_found)
                        out.writeString("\"");
                }
                const char* desc = dsg.description.ptr();
                if (desc != 0 && strlen(desc) > 0)
                {
                    out.writeString(" FIELDINFO=");
                    bool space_found = (strchr(desc, ' ') != NULL);
                    if (space_found)
                        out.writeString("\"");
                    out.writeString(desc);
                    if (space_found)
                        out.writeString("\"");
                }
                const char* querycode = dsg.querycode.ptr();
                if (querycode != 0 && strlen(querycode) > 0)
                {
                    out.writeString(" QUERYTYPE=");
                    bool space_found = (strchr(querycode, ' ') != NULL);
                    if (space_found)
                        out.writeString("\"");
                    out.writeString(querycode);
                    if (space_found)
                        out.writeString("\"");
                }
                const char* queryoper = dsg.queryoper.ptr();
                if (queryoper != 0 && strlen(queryoper) > 0)
                {
                    out.writeString(" QUERYOP=");
                    bool space_found = (strchr(queryoper, ' ') != NULL);
                    if (space_found)
                        out.writeString("\"");
                    out.writeString(queryoper);
                    if (space_found)
                        out.writeString("\"");
                }

                out.printf(" FIELDDISP=\"");
                _writeDataSGroupDisplay(dsg, out);
                out.printf("\"");
                if (dsg.data.size() > 0 && dsg.data[0] != 0)
                {
                    // Split field data by new lines
                    int len = dsg.data.size();
                    char* data = dsg.data.ptr();
                    while (len > 0)
                    {
                        int j;
                        for (j = 0; j < len - 1; j++)
                            if (data[j] == '\n')
                                break;

                        out.printf(" FIELDDATA=\"%.*s\"", j, data);
                        if (data[j] == '\n')
                            j++;

                        data += j;
                        len -= j;

                        if (*data == 0)
                            break;
                    }
                }
                _writeMultiString(output, buf.ptr(), buf.size());
            }
            else if (sgroup.sgroup_type == SGroup::SG_TYPE_SRU)
            {
                RepeatingUnit& ru = static_cast<RepeatingUnit&>(sgroup);
                if (ru.connectivity == SGroup::HEAD_TO_HEAD)
                    out.printf(" CONNECT=HH");
                else if (ru.connectivity == SGroup::HEAD_TO_TAIL)
                    out.printf(" CONNECT=HT");
                else
                    out.printf(" CONNECT=EU");
                if (ru.subscript.size() > 1)
                {
                    if (ru.subscript.find(' ') > -1)
                        out.printf(" LABEL=\"%s\"", ru.subscript.ptr());
                    else
                        out.printf(" LABEL=%s", ru.subscript.ptr());
                }
                _writeMultiString(output, buf.ptr(), buf.size());
            }
            else if (sgroup.sgroup_type == SGroup::SG_TYPE_MUL)
            {
                MultipleGroup& mg = static_cast<MultipleGroup&>(sgroup);
                if (mg.parent_atoms.size() > 0)
                {
                    out.printf(" PATOMS=(%d", mg.parent_atoms.size());
                    int j;
                    for (j = 0; j < mg.parent_atoms.size(); j++)
                        out.printf(" %d", _atom_mapping[mg.parent_atoms[j]]);
                    out.printf(")");
                }
                out.printf(" MULT=%d", mg.multiplier);
                _writeMultiString(output, buf.ptr(), buf.size());
            }
            else
            {
                _writeMultiString(output, buf.ptr(), buf.size());
            }
        }
        output.writeStringCR("M  V30 END SGROUP");
        _removeImplicitSGroups(mol, implicit_sgroups_indexes);
    }

    output.writeStringCR("M  V30 END CTAB");

    int n_rgroups = mol.rgroups.getRGroupCount();
    for (i = 1; i <= n_rgroups; i++)
        if (mol.rgroups.getRGroup(i).fragments.size() > 0)
            _writeRGroup(output, mol, i);

    int n_tgroups = mol.tgroups.getTGroupCount();
    if (n_tgroups > 0)
    {
        output.writeStringCR("M  V30 BEGIN TEMPLATE");

        for (i = mol.tgroups.begin(); i != mol.tgroups.end(); i = mol.tgroups.next(i))
        {
            _writeTGroup(output, mol, i);
        }
        output.writeStringCR("M  V30 END TEMPLATE");
    }
}