void SmilesSaver::_saveMolecule()

in core/indigo-core/molecule/src/smiles_saver.cpp [82:704]


void SmilesSaver::_saveMolecule()
{
    _validate(*_bmol);

    int i, j, k;

    _ignored_vertices.clear_resize(_bmol->vertexEnd());
    _ignored_vertices.zerofill();
    if (ignore_hydrogens)
    {
        if (_qmol != 0)
            throw Error("ignore_hydrogens does not make sense for query molecules");

        for (i = _bmol->vertexBegin(); i < _bmol->vertexEnd(); i = _bmol->vertexNext(i))
            if (_mol->asMolecule().convertableToImplicitHydrogen(i))
                _ignored_vertices[i] = 1;
    }

    _checkRGroupsAndAttachmentPoints();
    _checkSRU();

    _touched_cistransbonds = 0;
    _complicated_cistrans.clear_resize(_bmol->edgeEnd());
    _complicated_cistrans.zerofill();
    _ban_slashes.clear_resize(_bmol->edgeEnd());
    _ban_slashes.zerofill();
    _cis_trans_parity.clear_resize(_bmol->edgeEnd());
    _cis_trans_parity.zerofill();
    _markCisTrans();

    _atoms.clear();
    while (_atoms.size() < _bmol->vertexEnd())
        _atoms.push(_neipool);

    _written_atoms.clear();
    _written_bonds.clear();
    _written_components = 0;

    _aromatic_bonds.clear();

    _hcount.clear_resize(_bmol->vertexEnd());
    _hcount_ignored.clear_resize(_bmol->vertexEnd());

    for (i = _bmol->vertexBegin(); i < _bmol->vertexEnd(); i = _bmol->vertexNext(i))
    {
        _hcount[i] = 0;
        if (_mol != 0)
        {
            if (!_mol->isPseudoAtom(i) && !_mol->isTemplateAtom(i) && !_mol->isRSite(i))
                _hcount[i] = _mol->getImplicitH_NoThrow(i, -1);
        }
        else
        {
            int atom_number = _bmol->getAtomNumber(i);
            int atom_charge = _bmol->getAtomCharge(i);
            _hcount[i] = MoleculeSavers::getHCount(*_bmol, i, atom_number, atom_charge);
        }
        _hcount_ignored[i] = 0;

        const Vertex& vertex = _bmol->getVertex(i);

        if (ignore_hydrogens)
        {
            if (_hcount[i] >= 0)
            {
                for (j = vertex.neiBegin(); j != vertex.neiEnd(); j = vertex.neiNext(j))
                {
                    int idx = vertex.neiVertex(j);
                    if (_bmol->getAtomNumber(idx) == ELEM_H && _bmol->getAtomIsotope(idx) == 0)
                        if (_ignored_vertices[idx])
                            _hcount_ignored[i]++;
                }
                _hcount[i] += _hcount_ignored[i];
            }
        }

        if (_bmol->getAtomAromaticity(i) == ATOM_AROMATIC)
        {
            _atoms[i].aromatic = true;
            // From the SMILES specification:
            // Please note that only atoms on the following list
            // can be considered aromatic: C, N, O, P, S, As, Se, and * (wildcard).
            static int allowed_lowercase[] = {ELEM_C, ELEM_N, ELEM_O, ELEM_P, ELEM_S, ELEM_Se, ELEM_As};
            if (_bmol->atomNumberBelongs(i, allowed_lowercase, NELEM(allowed_lowercase)))
                _atoms[i].lowercase = true;
        }
    }

    DfsWalk walk(*_bmol);

    walk.ignored_vertices = _ignored_vertices.ptr();
    walk.vertex_ranks = vertex_ranks;
    if (_bmol->sgroups.isPolimer())
        walk.vertex_classes = _polymer_indices.ptr();

    if (separate_rsites)
    {
        for (i = _bmol->vertexBegin(); i < _bmol->vertexEnd(); i = _bmol->vertexNext(i))
        {
            if (_bmol->isRSite(i))
                // We break the DFS walk when going through R-sites. For details, see
                // http://blueobelisk.shapado.com/questions/how-r-group-atoms-should-be-represented-in-smiles
                walk.mustBeRootVertex(i);
        }
    }

    walk.walk();

    const Array<DfsWalk::SeqElem>& v_seq = walk.getSequence();
    Array<int> v_to_comp_group;
    v_to_comp_group.resize(v_seq.size());
    v_to_comp_group.fffill();

    if (_qmol != nullptr && smarts_mode)
    {
        if (v_seq.size() < 1)
            return; // No atoms to save
        std::set<int> components;
        int cur_component = -1;
        for (i = 0; i < v_seq.size(); ++i && _qmol->components.size() > 0)
        {
            // In v_seq each fragment started with vertex which parent == -1
            // In SMARTS some fragments could be grouped (component-level grouping)
            // In QueryMolecule group number stored in "".components" member. GroupId == 0 means no group defined.
            // Each fragment - connected graph, so all vertexes should belong to one group.
            // All group fragments should go one by one - in SMARTS its inside "()".
            if (v_seq[i].parent_vertex < 0) // New Fragment
            {
                int new_component = 0;
                if (_qmol->components.size() > v_seq[i].idx)
                {
                    new_component = _qmol->components[v_seq[i].idx];
                    // if component defined for new fragment(id>0) and its different from previous and seen before
                    if (new_component > 0 && new_component != cur_component && components.count(new_component))
                    {
                        // According to the DfsWalk code, the groups components should be neighbors.
                        // If will be found case when it wrong - add code to rearrange fragments
                        throw Error("SMARTS fragments need to reaarange.");
                    }
                    components.emplace(new_component);
                }
                cur_component = new_component;
            }
            else
            {
                if (_qmol->components.size() > v_seq[i].idx && cur_component != _qmol->components[v_seq[i].idx])
                {
                    // Fragment contains atoms from different components - something went wrong
                    throw Error("Fragment contains atoms from different components.");
                }
            }
            v_to_comp_group[i] = cur_component;
        }
    }

    // fill up neighbor lists for the stereocenters calculation
    for (i = 0; i < v_seq.size(); i++)
    {
        int v_idx = v_seq[i].idx;
        int e_idx = v_seq[i].parent_edge;
        int v_prev_idx = v_seq[i].parent_vertex;

        _Atom& atom = _atoms[v_idx];

        if (e_idx >= 0)
        {
            if (walk.isClosure(e_idx))
            {
                for (k = atom.neighbors.begin(); k != atom.neighbors.end(); k = atom.neighbors.next(k))
                {
                    if (atom.neighbors[k] == -1)
                    {
                        atom.neighbors[k] = v_prev_idx;
                        break;
                    }
                }
                if (k == atom.neighbors.end())
                    throw Error("internal: can not put closing bond to its place");
            }
            else
            {
                atom.neighbors.add(v_prev_idx);
                atom.parent = v_prev_idx;
            }
            _atoms[v_prev_idx].neighbors.add(v_idx);
        }

        if (e_idx < 0 || !walk.isClosure(e_idx))
        {
            int openings = walk.numOpenings(v_idx);

            for (j = 0; j < openings; j++)
                atom.neighbors.add(-1);
        }

        // also, detect polymer borders
        if (_polymer_indices[v_idx] >= 0 && (v_prev_idx == -1 || _polymer_indices[v_prev_idx] != _polymer_indices[v_idx]))
            atom.starts_polymer = true;

        if (v_prev_idx >= 0 && _polymer_indices[v_prev_idx] >= 0 && _polymer_indices[v_prev_idx] != _polymer_indices[v_idx])
            _atoms[v_prev_idx].ends_polymer = true;
    }

    _written_atoms_inv.clear_resize(_bmol->vertexEnd());
    _written_atoms_inv.fffill();

    for (i = 0; i < v_seq.size(); i++)
    {
        int v_idx = v_seq[i].idx;
        int e_idx = v_seq[i].parent_edge;

        if (e_idx == -1 || !walk.isClosure(e_idx))
        {
            _written_atoms.push(v_idx);
            _written_atoms_inv[v_idx] = _written_atoms.size() - 1;
        }

        if (e_idx != -1)
            _written_bonds.push(e_idx);
    }

    // detect chiral configurations
    MoleculeStereocenters& stereocenters = _bmol->stereocenters;

    for (i = stereocenters.begin(); i != stereocenters.end(); i = stereocenters.next(i))
    {
        int atom_idx, type, group, pyramid[4];

        stereocenters.get(i, atom_idx, type, group, pyramid);

        if (type < MoleculeStereocenters::ATOM_AND || !stereocenters.isTetrahydral(atom_idx))
            continue;

        int implicit_h_idx = -1;

        if (pyramid[3] == -1)
            implicit_h_idx = 3;
        else
            for (j = 0; j < 4; j++)
                if (_ignored_vertices[pyramid[j]])
                {
                    implicit_h_idx = j;
                    break;
                }

        int pyramid_mapping[4];
        int counter = 0;

        _Atom& atom = _atoms[atom_idx];

        if (atom.parent != -1)
            for (k = 0; k < 4; k++)
                if (pyramid[k] == atom.parent)
                {
                    pyramid_mapping[counter++] = k;
                    break;
                }

        if (implicit_h_idx != -1)
            pyramid_mapping[counter++] = implicit_h_idx;

        for (j = atom.neighbors.begin(); j != atom.neighbors.end(); j = atom.neighbors.next(j))
        {
            if (atom.neighbors[j] == atom.parent)
                continue;

            for (k = 0; k < 4; k++)
                if (atom.neighbors[j] == pyramid[k])
                {
                    if (counter >= 4)
                        throw Error("internal: pyramid overflow");
                    pyramid_mapping[counter++] = k;
                    break;
                }
        }

        if (counter == 4)
        {
            // move the 'from' atom to the end
            counter = pyramid_mapping[0];
            pyramid_mapping[0] = pyramid_mapping[1];
            pyramid_mapping[1] = pyramid_mapping[2];
            pyramid_mapping[2] = pyramid_mapping[3];
            pyramid_mapping[3] = counter;
        }
        else if (counter != 3)
            throw Error("cannot calculate chirality");

        if (MoleculeStereocenters::isPyramidMappingRigid(pyramid_mapping))
            _atoms[atom_idx].chirality = 1;
        else
            _atoms[atom_idx].chirality = 2;
    }

    MoleculeAlleneStereo& allene_stereo = _bmol->allene_stereo;

    for (i = allene_stereo.begin(); i != allene_stereo.end(); i = allene_stereo.next(i))
    {
        int atom_idx, left, right, parity, subst[4], subst_map[4] = {-1, -1, -1, -1};

        allene_stereo.get(i, atom_idx, left, right, subst, parity);

        for (j = 0; j < 4; j++)
            if (subst[j] >= 0)
                subst_map[j] = _written_atoms_inv[subst[j]];

        // Daylight doc says: Hydrogens attached to substituted allene-like atoms
        // are taken to be immediately following that atom
        if (subst_map[0] < 0)
            subst_map[0] = _written_atoms_inv[left];
        else if (subst_map[1] < 0)
            subst_map[1] = _written_atoms_inv[left];

        if (subst_map[2] < 0)
            subst_map[2] = _written_atoms_inv[right];
        else if (subst_map[3] < 0)
            subst_map[3] = _written_atoms_inv[right];

        if (subst_map[0] < 0 || subst_map[1] < 0 || subst_map[2] < 0 || subst_map[3] < 0)
            throw Error("internal: allene subsituent not mapped");

        if (subst_map[1] < subst_map[0])
        {
            std::swap(subst_map[0], subst_map[1]);
            parity = 3 - parity;
        }
        if (subst_map[3] < subst_map[2])
        {
            std::swap(subst_map[2], subst_map[3]);
            parity = 3 - parity;
        }

        _atoms[atom_idx].chirality = 3 - parity;
    }

    if (canonize_chiralities)
    {
        QS_DEF(Array<int>, marked);
        QS_DEF(Array<int>, ids);
        stereocenters = _bmol->stereocenters;

        marked.clear_resize(_bmol->vertexEnd());
        marked.zerofill();

        for (i = 0; i < _written_atoms.size(); i++)
        {
            if (marked[i])
                continue;

            int idx = _written_atoms[i];

            if (_atoms[idx].chirality == 0 || !stereocenters.isTetrahydral(idx))
                continue;

            int type = stereocenters.getType(idx);

            if (type != MoleculeStereocenters::ATOM_AND && type != MoleculeStereocenters::ATOM_OR)
                continue;

            ids.clear();
            ids.push(idx);

            int group = stereocenters.getGroup(idx);

            for (j = i + 1; j < _written_atoms.size(); j++)
            {
                if (marked[j])
                    continue;

                int idx2 = _written_atoms[j];

                if (_atoms[idx2].chirality == 0)
                    continue;

                int type2 = stereocenters.getType(idx2);
                int group2 = stereocenters.getGroup(idx2);

                if (type2 == type && group2 == group)
                {
                    ids.push(idx2);
                    marked[j] = 1;
                }
            }

            if (_atoms[ids[0]].chirality == 1)
                for (j = 0; j < ids.size(); j++)
                    _atoms[ids[j]].chirality = 3 - _atoms[ids[j]].chirality;
        }
    }

    // write the SMILES itself

    // cycle_numbers[i] == -1 means that the number is available
    // cycle_numbers[i] == n means that the number is used by vertex n
    QS_DEF(Array<int>, cycle_numbers);

    int rsites_closures_starting_num = 91;
    int rbonds = _countRBonds() + _n_attachment_points;

    if (rbonds > 9)
        rsites_closures_starting_num = 99 - rbonds;

    cycle_numbers.clear();
    cycle_numbers.push(0); // never used

    bool first_component = true;

    for (i = 0; i < v_seq.size(); i++)
    {
        int v_idx = v_seq[i].idx;
        int e_idx = v_seq[i].parent_edge;
        int v_prev_idx = v_seq[i].parent_vertex;
        bool write_atom = true;

        if (v_prev_idx >= 0)
        {
            int branches = walk.numBranches(v_prev_idx);

            if (branches > 1)
                if (_atoms[v_prev_idx].branch_cnt > 0 && _atoms[v_prev_idx].paren_written)
                    _output.writeChar(')');
            /*
             * Fix IND-673 unused if-statement
             */
            //         if (v_prev_idx >= 0)
            //         {

            if (branches > 1)
                if (_atoms[v_prev_idx].branch_cnt < branches - 1)
                {
                    if (walk.isClosure(e_idx))
                        _atoms[v_prev_idx].paren_written = false;
                    else
                    {
                        _output.writeChar('(');
                        _atoms[v_prev_idx].paren_written = true;
                    }
                }

            _atoms[v_prev_idx].branch_cnt++;

            if (_atoms[v_prev_idx].branch_cnt > branches)
                throw Error("unexpected branch");
            /*
             * Fix IND-673 unused if-statement
             */
            //         }

            const Edge& edge = _bmol->getEdge(e_idx);
            bool bond_written = true;

            int dir = 0;
            int bond_order = _bmol->getBondOrder(e_idx);

            // SMARTS and KET loaders store directions in
            if (!smarts_mode || _qmol == nullptr) // || (original_format != ket && original_format != smarts))
                if (bond_order == BOND_SINGLE)
                    dir = _calcBondDirection(e_idx, v_prev_idx);

            if ((dir == 1 && v_idx == edge.end) || (dir == 2 && v_idx == edge.beg))
                _output.writeChar('/');
            else if ((dir == 2 && v_idx == edge.end) || (dir == 1 && v_idx == edge.beg))
                _output.writeChar('\\');
            else if (smarts_mode && _qmol != 0)
                QueryMolecule::writeSmartsBond(_output, &_qmol->getBond(e_idx), false);
            else if (bond_order == BOND_DOUBLE)
                _output.writeChar('=');
            else if (bond_order == BOND_TRIPLE)
                _output.writeChar('#');
            else if (bond_order == BOND_AROMATIC && _shouldWriteAromaticBond(e_idx))
                _output.writeChar(':');
            else if (bond_order == BOND_SINGLE && _atoms[edge.beg].aromatic && _atoms[edge.end].aromatic)
                _output.writeChar('-');
            else
                bond_written = false;

            if (walk.isClosure(e_idx))
            {
                for (j = 1; j < cycle_numbers.size(); j++)
                    if (cycle_numbers[j] == v_idx)
                        break;

                if (j == cycle_numbers.size())
                    throw Error("cycle number not found");

                _writeCycleNumber(j);

                cycle_numbers[j] = -1;
                write_atom = false;
            }
        }
        else
        {
            if (!first_component)
            {
                // group == 0 means no group set.
                int prev_group = v_to_comp_group[i - 1];
                int new_group = v_to_comp_group[i];
                bool different_groups = new_group != prev_group;
                if (smarts_mode && prev_group && different_groups) // if component group ended
                    _output.writeChar(')');

                _output.writeChar('.');

                if (smarts_mode && new_group && different_groups) // if new group started
                    _output.writeChar('(');
            }
            else
            {
                if (smarts_mode && v_to_comp_group[i] > 0) // component level grouping set for this fragment
                    _output.writeChar('(');
                first_component = false;
            }
            _written_components++;
        }
        if (write_atom)
        {
            if (!smarts_mode)
                _writeAtom(v_idx, _atoms[v_idx].aromatic, _atoms[v_idx].lowercase, _atoms[v_idx].chirality);
            else if (_qmol != 0)
            {
                int aam = _bmol->reaction_atom_mapping[v_idx];
                QueryMolecule::writeSmartsAtom(_output, &_qmol->getAtom(v_idx), aam, _atoms[v_idx].chirality, 0, false, false, _qmol->original_format);
            }
            else
                throw Error("SMARTS format available for query only!");

            QS_DEF(Array<int>, closing);

            walk.getNeighborsClosing(v_idx, closing);

            for (int ap = 1; ap <= _bmol->attachmentPointCount(); ap++)
            {
                int idx = 0, atom_idx;

                for (idx = 0; (atom_idx = _bmol->getAttachmentPoint(ap, idx)) != -1; idx++)
                    if (atom_idx == v_idx)
                    {
                        // Here is the problem: if we have an attachment point, the resulting
                        // SMILES is supposed to contain an extra atom not present in the given
                        // molecule. For example, chlorine with an attachment point will
                        // become Cl%91.[*:1]%91 |;_AP1| (two atoms).
                        // We can not modify the given molecule, but we want the closure to
                        // be here. To achieve that, we add a link to an imagimary atom with
                        // incredibly big number.
                        closing.push(10000 + ap);
                    }
            }

            for (j = 0; j < closing.size(); j++)
            {
                bool ap = (closing[j] >= 10000);
                bool rsite = !ap && (separate_rsites && _bmol->isRSite(closing[j]));

                if (ap || rsite)
                {
                    cycle_numbers.expandFill(rsites_closures_starting_num + 1, -1);
                    for (k = rsites_closures_starting_num; k < cycle_numbers.size(); k++)
                        if (cycle_numbers[k] == -1)
                            break;
                }
                else
                {
                    for (k = 1; k < cycle_numbers.size(); k++)
                        if (cycle_numbers[k] == -1)
                            break;
                }

                if (ap)
                {
                    _attachment_indices.push(closing[j] - 10000);
                    _attachment_cycle_numbers.push(k);
                }

                if (k == cycle_numbers.size())
                    cycle_numbers.push(v_idx);
                else
                    cycle_numbers[k] = v_idx;

                _writeCycleNumber(k);
            }

            if (_atoms[v_idx].starts_polymer)
                _output.writeString("{-}");
            if (_atoms[v_idx].ends_polymer)
                _output.writeString("{+n}");
        }
    }
    if (smarts_mode && v_to_comp_group[i - 1] > 0) // if group set for last fragment - add finish )
        _output.writeChar(')');

    if (write_extra_info && chemaxon && !smarts_mode) // no extended block in SMARTS
    {
        // Before we write the |...| block (ChemAxon's Extended SMILES),
        // we must clean up the mess we did with the attachment points
        // (see big comment above). That is, we append separated atoms,
        // not present in the original molecule, to the end, and connect
        // them with the "cycle" closure to the real atoms that are the
        // attachment points.
        for (i = 0; i < _attachment_indices.size(); i++)
        {
            _output.printf(".[*:%d]", _attachment_indices[i]);
            _writeCycleNumber(_attachment_cycle_numbers[i]);
        }

        _comma = false;
        _writeRingCisTrans();
        _writeStereogroups();
        _writeRadicals();
        _writePseudoAtoms();
        _writeHighlighting();
        _writeRGroups();
        _writeSGroups();
        _writeRingBonds();
        _writeUnsaturated();
        _writeSubstitutionCounts();
        if (_bmol->hasAtropoStereoBonds())
            _writeWedges();

        if (_comma)
            _output.writeChar('|');
    }
}