void SmilesLoader::_readOtherStuff()

in core/indigo-core/molecule/src/smiles_loader.cpp [288:1338]


void SmilesLoader::_readOtherStuff()
{
    MoleculeCisTrans& cis_trans = _bmol->cis_trans;

    QS_DEF(Array<int>, to_remove);

    std::unordered_set<int> _overtly_defined_abs;

    to_remove.clear();

    while (1)
    {
        char c = _scanner.readChar();

        if (c == '|')
            break;

        if (c == 'w') // 'ANY' stereocenters
        {
            char wmode = 0;
            if (_scanner.lookNext() == 'U')
                wmode = 'U';
            if (_scanner.lookNext() == 'D')
                wmode = 'D';

            if (wmode)
                _scanner.skip(1);

            if (_scanner.readChar() != ':')
                throw Error("colon expected after 'w%c'", wmode);

            while (isdigit(_scanner.lookNext()))
            {
                int atom_idx = _scanner.readUnsigned();
                // handle wiggly bonds
                if (!wmode)
                {
                    // This either bond can mark stereocenter or cis-trans double bond
                    // For example CC=CN |w:1.0|
                    const Vertex& v = _bmol->getVertex(atom_idx);
                    bool found = false;
                    for (int nei : v.neighbors())
                    {
                        int edge_idx = v.neiEdge(nei);
                        if (_bmol->getBondOrder(edge_idx) == BOND_DOUBLE && _bmol->getBondTopology(edge_idx) != TOPOLOGY_RING)
                        {
                            cis_trans.ignore(edge_idx);
                            found = true;
                        }
                    }

                    if (!found)
                    {
                        if (_bmol->isPossibleStereocenter(atom_idx))
                        {
                            // Check if the stereocenter has already been marked as any
                            // For example [H]C1(O)c2ccnn2[C@@H](O)c2ccnn12 |r,w:1.0,1.1|
                            if (!_bmol->stereocenters.exists(atom_idx))
                                _bmol->addStereocenters(atom_idx, MoleculeStereocenters::ATOM_ANY, 0, false);
                        }
                    }
                }

                if (_scanner.lookNext() == '.')
                {
                    _scanner.skip(1);
                    auto bond_idx = _scanner.readUnsigned();
                    if (!_has_directions_on_rings)
                        _has_directions_on_rings = _bmol->getBondTopology(bond_idx) == TOPOLOGY_RING;
                    if (bond_idx < _bmol->edgeCount() && atom_idx < _bmol->vertexCount())
                    {
                        auto& v = _bmol->getEdge(bond_idx);
                        if (v.end == atom_idx)
                            _bmol->swapEdgeEnds(bond_idx);

                        if (v.beg == atom_idx)
                            _bmol->setBondDirection(bond_idx, wmode == 'U' ? BOND_UP : (wmode == 'D' ? BOND_DOWN : BOND_EITHER));
                    }
                }

                if (_scanner.lookNext() == ',')
                    _scanner.skip(1);
            }
        }
        else if (c == 'a') // 'ABS' stereocenters
        {
            if (_scanner.readChar() != ':')
                throw Error("colon expected after 'a'");

            while (isdigit(_scanner.lookNext()))
            {
                int idx = _scanner.readUnsigned();

                if (_bmol->stereocenters.exists(idx))
                {
                    _bmol->stereocenters.setType(idx, MoleculeStereocenters::ATOM_ABS, 0);
                }
                else
                {
                    _bmol->addStereocenters(idx, MoleculeStereocenters::ATOM_ABS, 0, false);
                    _bmol->stereocenters.setTetrahydral(idx, false);
                }
                _overtly_defined_abs.insert(idx);

                if (_scanner.lookNext() == ',')
                    _scanner.skip(1);
            }
        }
        else if (c == 'o') // 'OR' stereocenters
        {
            int groupno = _scanner.readUnsigned();

            if (_scanner.readChar() != ':')
                throw Error("colon expected after 'o'");

            while (isdigit(_scanner.lookNext()))
            {
                int idx = _scanner.readUnsigned();

                if (_bmol->stereocenters.exists(idx))
                    _bmol->stereocenters.setType(idx, MoleculeStereocenters::ATOM_OR, groupno);
                else
                {
                    _bmol->addStereocenters(idx, MoleculeStereocenters::ATOM_OR, groupno, false);
                    _bmol->stereocenters.setTetrahydral(idx, false);
                }

                if (_scanner.lookNext() == ',')
                    _scanner.skip(1);
            }
        }
        else if (c == '&') // 'AND' stereocenters
        {
            int groupno = _scanner.readUnsigned();

            if (_scanner.readChar() != ':')
                throw Error("colon expected after '&'");

            while (isdigit(_scanner.lookNext()))
            {
                int idx = _scanner.readUnsigned();
                if (_bmol->stereocenters.exists(idx))
                    _bmol->stereocenters.setType(idx, MoleculeStereocenters::ATOM_AND, groupno);
                else
                {
                    _bmol->addStereocenters(idx, MoleculeStereocenters::ATOM_AND, groupno, false);
                    _bmol->stereocenters.setTetrahydral(idx, false);
                }
                if (_scanner.lookNext() == ',')
                    _scanner.skip(1);
            }
        }
        else if (c == '^') // radicals
        {
            int rad = _scanner.readIntFix(1);
            int radical;

            if (rad == 1)
                radical = RADICAL_DOUBLET;
            else if (rad == 3)
                radical = RADICAL_SINGLET;
            else if (rad == 4)
                radical = RADICAL_TRIPLET;
            else
                throw Error("unsupported radical number: %d", rad);

            if (_scanner.readChar() != ':')
                throw Error("colon expected after radical number");

            while (isdigit(_scanner.lookNext()))
            {
                int idx = _scanner.readUnsigned();

                if (_mol != 0)
                    _mol->setAtomRadical(idx, radical);
                else
                    _qmol->resetAtom(idx, QueryMolecule::Atom::und(_qmol->releaseAtom(idx), new QueryMolecule::Atom(QueryMolecule::ATOM_RADICAL, radical)));

                if (_scanner.lookNext() == ',')
                    _scanner.skip(1);
            }
        }
        else if (c == '$') // pseudoatoms
        {
            QS_DEF(Array<char>, label);

            for (int i = _bmol->vertexBegin(); i != _bmol->vertexEnd(); i = _bmol->vertexNext(i))
            {
                label.clear();

                while (1)
                {
                    if (_scanner.isEOF())
                        throw Error("end of input while reading $...$ block");
                    c = _scanner.readChar();
                    if (c == ';' || c == '$')
                        break;
                    label.push(c);
                }
                if (c == '$' && i != _bmol->vertexEnd() - 1)
                    throw Error("only %d atoms found in pseudo-atoms $...$ block", i + 1);
                if (c == ';' && i == _bmol->vertexEnd() - 1)
                    throw Error("extra ';' in pseudo-atoms $...$ block");

                if (label.size() > 0)
                {
                    label.push(0);
                    int rnum;

                    if (label.size() > 3 && strncmp(label.ptr(), "_R", 2) == 0 && sscanf(label.ptr() + 2, "%d", &rnum) == 1)
                    {
                        // ChemAxon's Extended SMILES notation for R-sites
                        if (_qmol != 0)
                            _qmol->resetAtom(i, new QueryMolecule::Atom(QueryMolecule::ATOM_RSITE, 0));
                        _bmol->allowRGroupOnRSite(i, rnum);

                        // check multiple R-sites notation
                        BufferScanner strscan(label.ptr());
                        QS_DEF(Array<char>, word);
                        while (!strscan.isEOF())
                        {
                            strscan.skip(1);
                            strscan.readWord(word, ",;");
                            if (word.size() >= 3 && strncmp(word.ptr(), "_R", 2) == 0 && sscanf(word.ptr() + 2, "%d", &rnum) == 1)
                                _bmol->allowRGroupOnRSite(i, rnum);
                        }
                    }
                    else if (label.size() > 4 && strncmp(label.ptr(), "_AP", 3) == 0 && sscanf(label.ptr() + 3, "%d", &rnum) == 1)
                    {
                        // That is ChemAxon's Extended SMILES notation for attachment
                        // points. We mark the atom for removal and place attachment point
                        // markers on its neighbors.
                        int k;
                        const Vertex& v = _bmol->getVertex(i);

                        for (k = v.neiBegin(); k != v.neiEnd(); k = v.neiNext(k))
                            _bmol->addAttachmentPoint(rnum, v.neiVertex(k));
                        to_remove.push(i);
                    }
                    else
                    {
                        // That is ChemAxon's Extended SMILES notation for pseudoatoms and
                        // special atoms A,Q,X,M and AH,QH,XH,MH
                        if (label.size() > 3 &&
                            (strncmp(label.ptr() + label.size() - 3, "_p", 2) == 0 || strncmp(label.ptr() + label.size() - 3, "_e", 2) == 0))
                        {
                            label.pop();
                            label.pop();
                            label.pop();
                            label.push(0);
                        }

                        if (_mol != 0)
                        {
                            const auto atomNumber = _mol->getAtomNumber(i);
                            if (ELEM_MIN < atomNumber && atomNumber < ELEM_MAX)
                            {
                                _mol->setAlias(i, label.ptr());
                            }
                            else
                            {
                                _mol->setPseudoAtom(i, label.ptr());
                            }
                        }
                        else
                        {
                            if (label.size() == 2 && label[0] == 'Q')
                            {
                                std::unique_ptr<QueryMolecule::Atom> atom(_qmol->releaseAtom(i));
                                atom->removeConstraints(QueryMolecule::ATOM_NUMBER);
                                _qmol->resetAtom(
                                    i, QueryMolecule::Atom::und(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_H)),
                                                                QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_C))));
                            }
                            else if (label.size() == 3 && label[0] == 'Q' && label[1] == 'H')
                            {
                                std::unique_ptr<QueryMolecule::Atom> atom(_qmol->releaseAtom(i));
                                atom->removeConstraints(QueryMolecule::ATOM_NUMBER);
                                _qmol->resetAtom(i, QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_C)));
                            }
                            else if (label.size() == 3 && label[0] == 'A' && label[1] == 'H')
                            {
                                std::unique_ptr<QueryMolecule::Atom> x_atom = std::make_unique<QueryMolecule::Atom>();

                                x_atom->type = QueryMolecule::OP_NONE;
                                _qmol->resetAtom(i, x_atom.release());
                            }
                            else if (label.size() == 2 && label[0] == 'X')
                            {
                                std::unique_ptr<QueryMolecule::Atom> x_atom = std::make_unique<QueryMolecule::Atom>();

                                x_atom->type = QueryMolecule::OP_OR;
                                x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_F));
                                x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Cl));
                                x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Br));
                                x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_I));
                                x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_At));

                                std::unique_ptr<QueryMolecule::Atom> atom(_qmol->releaseAtom(i));
                                atom->removeConstraints(QueryMolecule::ATOM_NUMBER);
                                _qmol->resetAtom(i, x_atom.release());
                            }
                            else if (label.size() == 3 && label[0] == 'X' && label[1] == 'H')
                            {
                                std::unique_ptr<QueryMolecule::Atom> x_atom = std::make_unique<QueryMolecule::Atom>();

                                x_atom->type = QueryMolecule::OP_OR;
                                x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_F));
                                x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Cl));
                                x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Br));
                                x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_I));
                                x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_At));
                                x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_H));

                                std::unique_ptr<QueryMolecule::Atom> atom(_qmol->releaseAtom(i));
                                atom->removeConstraints(QueryMolecule::ATOM_NUMBER);
                                _qmol->resetAtom(i, x_atom.release());
                            }
                            else if (label.size() == 2 && label[0] == 'M')
                            {
                                std::unique_ptr<QueryMolecule::Atom> x_atom = std::make_unique<QueryMolecule::Atom>();

                                x_atom->type = QueryMolecule::OP_AND;
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_C)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_N)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_O)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_F)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_P)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_S)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Cl)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Se)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Br)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_I)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_At)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_He)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Ne)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Ar)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Kr)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Xe)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Rn)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_H)));

                                std::unique_ptr<QueryMolecule::Atom> atom(_qmol->releaseAtom(i));
                                atom->removeConstraints(QueryMolecule::ATOM_NUMBER);
                                _qmol->resetAtom(i, x_atom.release());
                            }
                            else if (label.size() == 3 && label[0] == 'M' && label[1] == 'H')
                            {
                                std::unique_ptr<QueryMolecule::Atom> x_atom = std::make_unique<QueryMolecule::Atom>();

                                x_atom->type = QueryMolecule::OP_AND;
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_C)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_N)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_O)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_F)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_P)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_S)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Cl)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Se)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Br)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_I)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_At)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_He)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Ne)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Ar)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Kr)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Xe)));
                                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Rn)));

                                std::unique_ptr<QueryMolecule::Atom> atom(_qmol->releaseAtom(i));
                                atom->removeConstraints(QueryMolecule::ATOM_NUMBER);
                                _qmol->resetAtom(i, x_atom.release());
                            }
                            else
                            {
                                const auto atomNumber = _qmol->getAtomNumber(i);
                                if (ELEM_MIN < atomNumber && atomNumber < ELEM_MAX)
                                {
                                    _qmol->setAlias(i, label.ptr());
                                }
                                else
                                {
                                    std::unique_ptr<QueryMolecule::Atom> atom(_qmol->releaseAtom(i));
                                    atom->removeConstraints(QueryMolecule::ATOM_NUMBER);
                                    _qmol->resetAtom(
                                        i, QueryMolecule::Atom::und(atom.release(), new QueryMolecule::Atom(QueryMolecule::ATOM_PSEUDO, label.ptr())));
                                }
                            }
                        }
                    }
                }
            }
        }
        else if (c == 'c' || c == 't') // CIS and TRANS bonds
        {
            if (_scanner.readChar() != ':')
                throw Error("colon expected after '%c' identifier", c);

            while (isdigit(_scanner.lookNext()))
            {
                int idx = _scanner.readUnsigned();

                bool skip = false;
                if (ignore_cistrans_errors && !MoleculeCisTrans::isGeomStereoBond(*_bmol, _bonds[idx].index, nullptr, false))
                    skip = true;

                if (!skip)
                {
                    _bmol->restoreSubstituents(_bonds[idx].index);
                    const int* subst = _bmol->cis_trans.getSubstituents(_bonds[idx].index);
                    int parity = ((c == 'c') ? MoleculeCisTrans::CIS : MoleculeCisTrans::TRANS);

                    /* CXSmiles doc says:
                       the double bond has the representation a1-a2=a3-a4, where
                       a1 is the smallest atom index of the generated smiles connected to a2
                       a2 is the double bond smaller atom index in the generated smiles
                       a3 is the double bond larger atom index in the generated smiles
                       a4 is the smallest atom index of the generated smiles connected to a3

                     * We need to know if the calculated substituents' indices are not "smallest"
                     * (i.e. they have other substituent with smaller index on the same side).
                     * In that case, we invert the parity.
                     */

                    if (subst[1] != -1 && subst[1] < subst[0])
                        parity = 3 - parity;
                    if (subst[3] != -1 && subst[3] < subst[2])
                        parity = 3 - parity;

                    _bmol->cis_trans.setParity(_bonds[idx].index, parity);
                }
                if (_scanner.lookNext() == ',')
                    _scanner.skip(1);
            }
        }
        else if (c == '(') // atom coordinates
        {
            for (int i = _bmol->vertexBegin(); i != _bmol->vertexEnd(); i = _bmol->vertexNext(i))
            {
                float x, y, z = 0;

                x = _scanner.readFloat();
                if (_scanner.readChar() != ',')
                    throw Error("expected comma after X coordinate");

                y = _scanner.readFloat();
                if (_scanner.lookNext() != ';' && _scanner.lookNext() != ')')
                {
                    if (_scanner.readChar() != ',')
                        throw Error("expected comma after Y coordinate");
                    if (_scanner.lookNext() == ';')
                        _scanner.skip(1);
                    else if (_scanner.lookNext() == ')')
                        ;
                    else
                        z = _scanner.readFloat();
                }
                else
                {
                    _scanner.skip(1);
                    if (_scanner.readChar() != ';')
                        throw Error("expected ';' after coordinates");
                }

                _bmol->setAtomXyz(i, x, y, z);
            }
            if (_scanner.readChar() != ')')
                throw Error("expected ')' after coordinates");
            _has_atom_coordinates = true;
        }
        else if (c == 'h') // highlighting (Indigo's own extension)
        {
            c = _scanner.readChar();

            int a = false;

            if (c == 'a')
                a = true;
            else if (c != 'b')
                throw Error("expected 'a' or 'b' after 'h', got '%c'", c);

            if (_scanner.readChar() != ':')
                throw Error("colon expected after 'h%c'", a ? 'a' : 'b');

            while (isdigit(_scanner.lookNext()))
            {
                int idx = _scanner.readUnsigned();

                if (a)
                    _bmol->highlightAtom(idx);
                else
                    _bmol->highlightBond(idx);

                if (_scanner.lookNext() == ',')
                    _scanner.skip(1);
            }
        }
        else if (c == 'r')
        {
            if (_scanner.lookNext() == 'b')
            {
                if (_qmol == 0)
                    throw Error("'rb' is allowed only within queries");
                _scanner.skip(1);
                if (_scanner.readChar() != ':')
                    throw Error("colon expected after 'rb' identifier");
                while (isdigit(_scanner.lookNext()))
                {
                    // remove 'x' or 'r' configured ATOM_RING_BONDS
                    int atom_idx = _scanner.readUnsigned();
                    QueryMolecule::Atom& atom = _qmol->getAtom(atom_idx);
                    if (atom.hasConstraint(QueryMolecule::ATOM_RING_BONDS))
                        atom.removeConstraints(QueryMolecule::ATOM_RING_BONDS);

                    if (_scanner.readChar() != ':')
                        throw Error("colon expected after 'rb:n'");
                    if (_scanner.lookNext() == '*')
                    {
                        _scanner.skip(1);
                        int rbonds = 0;
                        const Vertex& vertex = _qmol->getVertex(atom_idx);
                        for (int k = vertex.neiBegin(); k != vertex.neiEnd(); k = vertex.neiNext(k))
                            if (_qmol->getEdgeTopology(vertex.neiEdge(k)) == TOPOLOGY_RING)
                                rbonds++;

                        _qmol->resetAtom(atom_idx, QueryMolecule::Atom::und(_qmol->releaseAtom(atom_idx),
                                                                            new QueryMolecule::Atom(QueryMolecule::ATOM_RING_BONDS_AS_DRAWN, rbonds)));
                    }
                    else
                    {
                        int rbcount = _scanner.readUnsigned();
                        if (rbcount)
                        {
                            _qmol->resetAtom(atom_idx, QueryMolecule::Atom::und(
                                                           _qmol->releaseAtom(atom_idx),
                                                           new QueryMolecule::Atom(QueryMolecule::ATOM_RING_BONDS, rbcount, (rbcount < 4 ? rbcount : 100))));
                        }
                        else
                            _qmol->resetAtom(
                                atom_idx, QueryMolecule::Atom::und(_qmol->releaseAtom(atom_idx), new QueryMolecule::Atom(QueryMolecule::ATOM_RING_BONDS, 0)));
                    }
                    if (_scanner.lookNext() == ',')
                        _scanner.skip(1);
                }
            }
            else
            {
                // All stereocenters are relative instead of abs
                MoleculeStereocenters& s = _bmol->stereocenters;
                for (int i = s.begin(); i != s.end(); i = s.next(i))
                {
                    int atom = s.getAtomIndex(i);
                    if (s.getType(atom) == MoleculeStereocenters::ATOM_ABS && !ignore_no_chiral_flag &&
                        _overtly_defined_abs.find(atom) == _overtly_defined_abs.end())
                        s.setType(atom, MoleculeStereocenters::ATOM_AND, 1);
                }
            }
        }
        else if ((c == 'S') && (_scanner.lookNext() == 'g'))
        {
            // SGroup block found
            _scanner.skip(1);
            int sg_type = -1;
            // Data S-group - 'SgD:atomic_indexes:field_name:data_value:query_op:unit:tag:(coords)'
            // Optional coordinates in parenthesis if necessary, separated by colon characters.
            // The field values with special characters are escaped.
            // If atomic coordinates are exported (with option c ) (-1) is used in the coordinate field for Data S-group attached to the atoms.
            if (_scanner.lookNext() == 'D')
            {
                _scanner.skip(1);
                sg_type = SGroup::SG_TYPE_DAT;
            }
            if (_scanner.readChar() != ':')
                throw Error("colon expected after 'Sg'");

            // If not a data S-group - get group type after colon
            //
            // 'Sg:type:atomic_indexes:subscript:superscript:head_bond_indexes:tail_bond_indexes:bracket
            //
            // atomic_indexes - Atom indexes separated with commas
            // subscript - Subscript of the S-group. If the subscript equals the keyword of the S-group this field can be empty. Escaped field.
            // superscript - Superscript of the S-group. Only connectivity and flip information is allowed. This field can be empty. Escaped field.
            // *_bond_indexes - The indexes of bonds that share a common bracket in case of ladder-type polymers.
            // head_bond_indexes - Head crossing bond indexes. This field can be empty.
            // tail_bond_indexes - Tail crossing bond indexes. This field can be empty.
            // bracket - bracket orientation, bracket type followed by the coordinates (4 pair, separated with commas). Bracket orientation
            //     can be s or d (single or double), bracket type can be b,c,r,s for braces, chevrons, round and square, respectively.
            //     The brackets are written between parentheses and separated with semicolons.
            if (sg_type == -1)
            {
                int sg = _scanner.lookNext();
                constexpr size_t sg_type_max_len = 3;
                char pchar_sg_type[sg_type_max_len];
                std::string sg_type_str;
                if (sg == 'n')
                {
                    sg_type = SGroup::SG_TYPE_SRU;
                    _scanner.skip(1);
                    if (_scanner.readChar() != ':')
                        throw Error("colon expected after 'Sg:n'");
                }
                else if (sg == 'g')
                {
                    _scanner.readCharsFix(sizeof(pchar_sg_type), pchar_sg_type);
                    sg_type_str = std::string(pchar_sg_type, sizeof(pchar_sg_type));
                    if (sg_type_str == "gen")
                    {
                        if (_scanner.readChar() != ':')
                            throw Error("colon expected after 'Sg:%s'", sg_type_str.c_str());
                        sg_type = SGroup::SG_TYPE_GEN;
                    }
                    else
                        throw Error("unexpected 'Sg' %s", sg_type_str.c_str());
                }
                else
                {
                    throw Error("Unsupported Sg type");
                }
            }

            int idx = _bmol->sgroups.addSGroup(sg_type);
            auto& sgroup = _bmol->sgroups.getSGroup(idx);

            // add brackets
            Vec2f* p = sgroup.brackets.push();
            p[0].set(0, 0);
            p[1].set(0, 0);
            p = sgroup.brackets.push();
            p[0].set(0, 0);
            p[1].set(0, 0);

            while (isdigit(_scanner.lookNext()))
            {
                auto atom_idx = _scanner.readUnsigned();
                sgroup.atoms.push(atom_idx);
                if (_scanner.lookNext() == ',')
                    _scanner.skip(1);
            }

            if (_scanner.lookNext() != ':')
                continue;

            _scanner.skip(1); // skip ':'
            const char* word_delimiter = ":,|";

            if (sg_type == SGroup::SG_TYPE_DAT)
            {
                DataSGroup& dsg = static_cast<DataSGroup&>(sgroup);
                // field_name
                _scanner.readWord(dsg.name, word_delimiter);
                if (_scanner.lookNext() != ':') // No more fields
                    continue;
                _scanner.skip(1); // Skip :
                // data_value
                _scanner.readWord(dsg.data, word_delimiter);
                if (_scanner.lookNext() != ':') // No more fields
                    continue;
                _scanner.skip(1); // Skip :
                // query_op
                _scanner.readWord(dsg.queryoper, word_delimiter);
                if (_scanner.lookNext() != ':') // No more fields
                    continue;
                _scanner.skip(1); // Skip :
                // unit
                _scanner.readWord(dsg.description, word_delimiter);
                if (_scanner.lookNext() != ':') // No more fields
                    continue;
                _scanner.skip(1); // Skip :
                // tag
                int next = _scanner.lookNext();
                if (next > 0 && next != ':' && next != ',')
                {
                    dsg.tag = static_cast<char>(next);
                    _scanner.skip(1); // Skip tag
                }
                if (_scanner.lookNext() != ':') // No more fields
                    continue;
                _scanner.skip(1); // Skip :
                // (coords)
                if (_scanner.lookNext() != '(') // No more fields
                    continue;
                long long pos = _scanner.tell();
                constexpr char minus1[] = "(-1)";
                constexpr size_t minus1_len = sizeof(minus1) - 1;
                if (_scanner.length() - pos >= minus1_len)
                {
                    // check for (-1)
                    char buf[minus1_len];
                    _scanner.read(minus1_len, buf);
                    if (strncmp(buf, minus1, sizeof(buf)) == 0)
                        continue;
                    _scanner.seek(pos, SEEK_SET);
                }
                _scanner.skip(1); // Skip (
                dsg.display_pos.x = _scanner.readFloat();
                c = _scanner.readChar();
                if (c != ',')
                    throw Error("Data S-group coord error");
                dsg.display_pos.y = _scanner.readFloat();
                c = _scanner.readChar();
                if (c != ')')
                    throw Error("Data S-group coord error");
            }
            else
            {
                QS_DEF(Array<char>, subscript);
                QS_DEF(Array<char>, conn_arr);
                std::string connectivity, flip;
                subscript.clear();
                conn_arr.clear();
                _scanner.readWord(subscript, word_delimiter);
                if (_scanner.lookNext() == ':')
                {
                    _scanner.skip(1);
                    _scanner.readWord(conn_arr, word_delimiter);
                    if (conn_arr.find('#') >= 0)
                    {
                        // Possible encoded symbols. Try to decode
                        BufferScanner word_scan{conn_arr};
                        while (!word_scan.isEOF())
                            connectivity += readSgChar(word_scan);
                    }
                    else
                    {
                        connectivity = conn_arr.ptr();
                    }
                    // If ',' in field - it is both connectivity and flip
                    std::size_t pos = connectivity.find(',');
                    if (pos != std::string::npos)
                    {
                        flip = connectivity.substr(pos + 1);
                        connectivity = connectivity.substr(0, pos);
                    }
                }

                // Set fields for SRU S-Group
                if (sg_type == SGroup::SG_TYPE_SRU)
                {
                    RepeatingUnit& ru = static_cast<RepeatingUnit&>(sgroup);
                    if (subscript.size())
                        ru.subscript.readString(subscript.ptr(), true);
                    if (connectivity == "ht")
                        ru.connectivity = RepeatingUnit::HEAD_TO_TAIL;
                    else if (connectivity == "hh")
                        ru.connectivity = RepeatingUnit::HEAD_TO_HEAD;
                    else if (connectivity == "eu")
                        ru.connectivity = RepeatingUnit::EITHER;
                }

                if (_scanner.lookNext() != ':')
                    continue;
                _scanner.skip(1); // skip :
                // head_bond_indexes - Head crossing bond indexes. This field can be empty.
                while (isdigit(_scanner.lookNext()))
                {
                    /*auto atom_idx =*/std::ignore = _scanner.readUnsigned();
                    // no support for now
                    if (_scanner.lookNext() == ',')
                        _scanner.skip(1);
                }
                if (_scanner.lookNext() != ':')
                    continue;
                _scanner.skip(1); // skip :
                // tail_bond_indexes - Tail crossing bond indexes. This field can be empty.
                while (isdigit(_scanner.lookNext()))
                {
                    /*auto atom_idx =*/std::ignore = _scanner.readUnsigned();
                    // no support for now
                    if (_scanner.lookNext() == ',')
                        _scanner.skip(1);
                }
                if (_scanner.lookNext() != ':')
                    continue;
                _scanner.skip(1); // skip :
                // bracket - bracket orientation, bracket type followed by the coordinates (4 pair, separated with commas).
                if (_scanner.lookNext() != '(')
                    continue;
                _scanner.skip(1); // skip (
                /*char br_orient = */ std::ignore = _scanner.readChar();
                c = _scanner.readChar();
                if (c != ',')
                    throw Error("S-group bracket orientation format error");
                /*char br_type =*/std::ignore = _scanner.readChar();
                c = _scanner.readChar();
                int count = 0;
                constexpr int bracket_coord_count = 8;
                while (c == ',' && count < bracket_coord_count)
                {
                    std::ignore = _scanner.readFloat();
                    c = _scanner.readChar();
                    ++count;
                }
                if (count < bracket_coord_count)
                    throw Error("S-group bracket orientation format error");
                if (c == ',')
                    c = _scanner.readChar();
                if (c != ')')
                    throw Error("S-group bracket orientation format error");
            }
        }
        else if ((c == 'R') && (_scanner.lookNext() == 'G'))
        {
            // RGroup block found
            _scanner.skip(1);

            if (_scanner.readChar() != ':')
                throw Error("colon expected after 'RG'");

            MoleculeRGroups* rgroups = &_bmol->rgroups;
            QS_DEF(Array<char>, label);

            while (1)
            {
                if ((_scanner.lookNext() == '_') || (_scanner.lookNext() == 'L'))
                    label.clear();
                else if (_scanner.lookNext() == '|')
                    break;

                while (1)
                {
                    if (_scanner.isEOF())
                        throw Error("end of input while reading RG block");
                    c = _scanner.readChar();
                    if (c == '=')
                        break;
                    label.push(c);
                }

                if (label.size() > 0)
                {
                    label.push(0);
                    int rnum;

                    if (label.size() > 3 && strncmp(label.ptr(), "_R", 2) == 0 && sscanf(label.ptr() + 2, "%d", &rnum) == 1)
                    {
                        // RGroup description found
                        QS_DEF(Array<char>, rgdesc);
                        RGroup& rgroup = rgroups->getRGroup(rnum);

                        while (1)
                        {
                            if (_scanner.isEOF())
                                throw Error("end of input while reading RG block");

                            if (_scanner.lookNext() == '{')
                            {
                                _scanner.skip(1);
                                _scanner.readWord(rgdesc, "}");
                                _scanner.skip(1);
                            }
                            else if (_scanner.lookNext() == ',')
                            {
                                _scanner.skip(1);
                                continue;
                            }
                            else if ((_scanner.lookNext() == '_') || (_scanner.lookNext() == 'L') || (_scanner.lookNext() == '|'))
                            {
                                break;
                            }
                            else
                            {
                                _scanner.skip(1);
                                continue;
                            }

                            if (rgdesc.size() > 0)
                            {
                                rgdesc.pop();

                                std::unique_ptr<BaseMolecule> fragment(_bmol->neu());
                                BufferScanner rg_scanner(rgdesc);
                                SmilesLoader rg_loader(rg_scanner);

                                if (_bmol->isQueryMolecule())
                                {
                                    rg_loader.loadQueryMolecule(fragment.get()->asQueryMolecule());
                                }
                                else
                                {
                                    rg_loader.loadMolecule(fragment.get()->asMolecule());
                                }

                                rgroup.fragments.add(fragment.release());
                            }
                        }
                    }
                    else if (label.size() > 3 && strncmp(label.ptr(), "LOG", 3) == 0)
                    {
                        // RGroup logic block found
                        while (1)
                        {
                            label.clear();
                            if ((_scanner.lookNext() == '{') || (_scanner.lookNext() == '_'))
                            {
                                if (_scanner.lookNext() == '{')
                                    _scanner.skip(1);

                                while (1)
                                {
                                    if (_scanner.isEOF())
                                        throw Error("end of input while reading LOG block");
                                    c = _scanner.readChar();
                                    if (c == ':')
                                        break;
                                    label.push(c);
                                }
                            }
                            else if (_scanner.lookNext() == '}')
                            {
                                _scanner.skip(1);
                                break;
                            }
                            else
                                break;

                            if (label.size() > 0)
                            {
                                label.push(0);

                                if (label.size() > 3 && strncmp(label.ptr(), "_R", 2) == 0 && sscanf(label.ptr() + 2, "%d", &rnum) == 1)
                                {
                                    RGroup& rgroup = rgroups->getRGroup(rnum);

                                    int if_then = 0;
                                    int rest_h = 0;
                                    QS_DEF(Array<char>, occurrence_str);

                                    if (_scanner.lookNext() == '_')
                                    {
                                        label.clear();
                                        while (1)
                                        {
                                            if (_scanner.isEOF())
                                                throw Error("end of input while reading LOG block");
                                            c = static_cast<char>(_scanner.lookNext());
                                            if (c == ';')
                                                break;
                                            label.push(c);
                                            _scanner.skip(1);
                                        }
                                        label.push(0);

                                        if (label.size() > 3 && strncmp(label.ptr(), "_R", 2) == 0 && sscanf(label.ptr() + 2, "%d", &rnum) == 1)
                                        {
                                            if_then = rnum;
                                        }
                                    }

                                    rgroup.if_then = if_then;

                                    if (_scanner.lookNext() == ';')
                                    {
                                        _scanner.skip(1);
                                        if (_scanner.lookNext() == 'H')
                                        {
                                            rest_h = 1;
                                            _scanner.skip(1);
                                        }
                                    }

                                    rgroup.rest_h = rest_h;

                                    if (_scanner.lookNext() == ';')
                                    {
                                        _scanner.skip(1);
                                        if (_scanner.lookNext() == '.')
                                        {
                                            _scanner.skip(1);
                                            break;
                                        }
                                    }

                                    _scanner.readWord(occurrence_str, ".}");
                                    _readRGroupOccurrenceRanges(occurrence_str.ptr(), rgroup.occurrence);

                                    _scanner.skip(1);
                                }
                            }
                        }
                    }
                }
            }
        }
        else if (c == 'u')
        {
            if (_qmol == 0)
                throw Error("'u' is allowed only within queries");
            if (_scanner.readChar() != ':')
                throw Error("colon expected after 'u' identifier");
            while (isdigit(_scanner.lookNext()))
            {
                int atom_idx = _scanner.readUnsigned();
                _qmol->resetAtom(atom_idx,
                                 QueryMolecule::Atom::und(_qmol->releaseAtom(atom_idx), new QueryMolecule::Atom(QueryMolecule::ATOM_UNSATURATION, 0)));
                if (_scanner.lookNext() == ',')
                    _scanner.skip(1);
            }
        }
        else if (c == 's')
        {
            if (_qmol == 0)
                throw Error("'s' is allowed only within queries");
            if (_scanner.readChar() != ':')
                throw Error("colon expected after 's' identifier");
            while (isdigit(_scanner.lookNext()))
            {
                int atom_idx = _scanner.readUnsigned();
                if (_scanner.readChar() != ':')
                    throw Error("colon expected after 's:n'");
                int subs = -2;
                if (_scanner.lookNext() == '*')
                {
                    _scanner.skip(1);
                }
                else
                {
                    subs = _scanner.readUnsigned();
                    if (!subs)
                        subs = -1;
                }

                QueryMolecule::Atom& atom = _qmol->getAtom(atom_idx);
                // remove what was set with 'D'
                if (atom.hasConstraint(QueryMolecule::ATOM_SUBSTITUENTS))
                    atom.removeConstraints(QueryMolecule::ATOM_SUBSTITUENTS);

                switch (subs)
                {
                case -1:
                    _qmol->resetAtom(atom_idx,
                                     QueryMolecule::Atom::und(_qmol->releaseAtom(atom_idx), new QueryMolecule::Atom(QueryMolecule::ATOM_SUBSTITUENTS, 0)));
                    break;
                case -2:
                    _qmol->resetAtom(atom_idx,
                                     QueryMolecule::Atom::und(_qmol->releaseAtom(atom_idx), new QueryMolecule::Atom(QueryMolecule::ATOM_SUBSTITUENTS_AS_DRAWN,
                                                                                                                    _qmol->getVertex(atom_idx).degree())));
                    break;
                default:
                    _qmol->resetAtom(atom_idx, QueryMolecule::Atom::und(_qmol->releaseAtom(atom_idx), new QueryMolecule::Atom(QueryMolecule::ATOM_SUBSTITUENTS,
                                                                                                                              subs, (subs < 6 ? subs : 100))));
                    break;
                }
                if (_scanner.lookNext() == ',')
                    _scanner.skip(1);
            }
        }
    }

    if (to_remove.size() > 0)
        _bmol->removeAtoms(to_remove);
}