void SmilesLoader::_readAtom()

in core/indigo-core/molecule/src/smiles_loader.cpp [2802:3504]


void SmilesLoader::_readAtom(Array<char>& atom_str, bool first_in_brackets, _AtomDesc& atom, std::unique_ptr<QueryMolecule::Atom>& qatom, bool smarts_mode,
                             bool inside_rsmiles)
{
    if (!_readAtomLogic(atom_str, first_in_brackets, atom, qatom, smarts_mode, inside_rsmiles))
        return;

    BufferScanner scanner(atom_str);

    bool element_assigned = false;
    bool neg = false;
    while (!scanner.isEOF())
    {
        bool isotope_set = false;
        int element = -1;
        int aromatic = 0;
        int next = scanner.lookNext();
        std::unique_ptr<QueryMolecule::Atom> subatom;

        if (next == '!')
        {
            if (qatom.get() == 0 || !smarts_mode)
                throw Error("'!' is allowed only with smarts_mode queries");

            scanner.skip(1);
            neg = !neg;
            first_in_brackets = false;
            continue;
        }
        else if (next == '$')
        {
            scanner.skip(1);
            if (scanner.readChar() != '(')
                throw Error("'$' must be followed by '('");
            if (!smarts_mode)
                throw Error("$( notation can be used only with smarts_mode");

            QS_DEF(Array<char>, subexp);

            subexp.clear();
            int cnt = 1;

            while (1)
            {
                char c = scanner.readChar();
                if (c == '(')
                    cnt++;
                else if (c == ')')
                {
                    cnt--;
                    if (cnt == 0)
                        break;
                }
                subexp.push(c);
            }

            BufferScanner subscanner(subexp);
            std::unique_ptr<SmilesLoader> subloader = std::make_unique<SmilesLoader>(subscanner);
            std::unique_ptr<QueryMolecule> fragment = std::make_unique<QueryMolecule>();

            subloader->loadSMARTS(*fragment);
            fragment->fragment_smarts.copy(subexp);
            fragment->fragment_smarts.push(0);

            if (subatom.get() == 0)
                subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_FRAGMENT, fragment.release());
            else
                subatom.reset(QueryMolecule::Atom::und(subatom.release(), new QueryMolecule::Atom(QueryMolecule::ATOM_FRAGMENT, fragment.release())));
        }
        else if (isdigit(next))
        {
            int isotope = scanner.readUnsigned();

            if (qatom.get() != 0)
                subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_ISOTOPE, isotope);
            else
                atom.isotope = isotope;
            isotope_set = true;
        }
        else if (next == 'H')
        {
            scanner.skip(1);

            // Now comes the trouble with the 'H' symbol.
            // As the manual says
            // (see http://www.daylight.com/dayhtml/doc/theory/theory.smarts.html):
            //    [H] means hydrogen atom.
            //    [*H2] means any atom with exactly two hydrogens attached.
            // Yet in the combined expressions like [n;H1] 'H' means the hydrogen
            // count, not the element. To distinguish these things, we use
            // the 'first in brackets' flag, which is true only for the very
            // first sub-expression in the brackets.
            // Also, the following elements begin with H: He, Hs, Hf, Ho, Hg
            if (strchr("esfog", scanner.lookNext()) == NULL)
            {
                if (first_in_brackets)
                {
                    element = ELEM_H;
                }
                else
                {
                    atom.hydrogens = 1;
                    if (isdigit(scanner.lookNext()))
                        atom.hydrogens = scanner.readUnsigned();
                    if (qatom.get() != 0)
                        subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_TOTAL_H, atom.hydrogens);
                }
            }
            else
            {
                element = Element::fromTwoChars('H', scanner.readChar());
            }
        }
        // The 'A' symbol is weird too. It can be the 'aliphatic' atomic primitive,
        // and can also be Al, Ar, As, Ag, Au, At, Ac, or Am.
        else if (next == 'A')
        {
            scanner.skip(1);

            if (strchr("lrsgutcm", scanner.lookNext()) == NULL)
            {
                if (qatom.get() == 0)
                    throw Error("'A' specifier is allowed only for query molecules");

                subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_AROMATICITY, ATOM_ALIPHATIC);
            }
            else
            {
                element = Element::fromTwoChars('A', scanner.readChar());
            }
        }
        // Similarly, 'R' can start Rb, Ru, Rh, Re, Rn, Ra, Rf, Rg
        else if (next == 'R')
        {
            scanner.skip(1);

            if (strchr("buhenafg", scanner.lookNext()) == NULL)
            {
                if (qatom.get() != 0)
                {
                    if (isdigit(scanner.lookNext()))
                    {
                        int rc = scanner.readUnsigned();
                        subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_SSSR_RINGS, rc);
                    }
                    else
                        subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_RING_BONDS, 1, 100);
                }
                else
                {
                    // Check possible Biovia Draw R-sites notaion
                    if (isdigit(scanner.lookNext()))
                    {
                        int rc = scanner.readUnsigned();
                        atom.label = ELEM_RSITE;
                        atom.rsite_num = rc;
                    }
                }
            }
            else
            {
                element = Element::fromTwoChars('R', scanner.readChar());
            }
        }
        // Yet 'D' can start Db, Ds, Dy
        else if (next == 'D')
        {
            scanner.skip(1);

            if (strchr("bsy", scanner.lookNext()) == NULL)
            {
                if (qatom.get() == 0)
                    throw Error("'D' specifier is allowed only for query molecules");

                int degree = 1;

                if (isdigit(scanner.lookNext()))
                    degree = scanner.readUnsigned();

                subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_SUBSTITUENTS, degree);
            }
            else
            {
                element = Element::fromTwoChars('D', scanner.readChar());
            }
        }
        // ... and 'X' can start Xe
        else if (next == 'X')
        {
            scanner.skip(1);

            if (scanner.lookNext() != 'e')
            {
                if (qatom.get() == 0)
                    throw Error("'X' specifier is allowed only for query molecules");

                int conn = 1;

                if (isdigit(scanner.lookNext()))
                    conn = scanner.readUnsigned();

                subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_CONNECTIVITY, conn);
            }
            else
            {
                element = Element::fromTwoChars('X', scanner.readChar());
            }
        }
        else if (next == '*')
        {
            atom.star_atom = true;
            scanner.skip(1);
            if (first_in_brackets && atom_str.size() < 2 && !smarts_mode)
            {
                atom.label = ELEM_RSITE;
            }
            else if (first_in_brackets && scanner.lookNext() == ':' && !inside_rsmiles)
            {
                atom.label = ELEM_RSITE;
            }
            else
            {
                if (qatom.get() == 0)
                    atom.label = ELEM_PSEUDO;
                else
                {
                    subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_NUMBER, ELEM_H);
                    subatom.reset(QueryMolecule::Atom::nicht(subatom.release()));
                }
            }
        }
        else if (next == '#')
        {
            scanner.skip(1);
            if (scanner.lookNext() == 'G')
            {
                scanner.skip(1);
                auto group = scanner.readUnsigned();
                std::unique_ptr<QueryMolecule::Atom> x_atom = std::make_unique<QueryMolecule::Atom>();
                x_atom->type = QueryMolecule::OP_OR;
                switch (group)
                {
                case 1: {
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_H));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Li));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Na));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_K));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Rb));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Cs));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Fr));
                    break;
                }
                case 2: {
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Be));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Mg));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Ca));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Sr));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Ba));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Ra));
                    break;
                }
                case 3: {
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_B));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Al));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Ga));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_In));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Ti));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Nh));
                    break;
                }
                case 4: {
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_C));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Si));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Ge));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Sn));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Pb));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Fl));
                    break;
                }
                case 5: {
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_N));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_P));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_As));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Sb));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Bi));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Mc));
                    break;
                }
                case 6: {
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_O));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_S));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Se));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Te));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Pa));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Lv));
                    break;
                }
                case 7: {
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_F));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Cl));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Br));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_I));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_At));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Ts));
                    break;
                }
                case 8: {
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_He));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Ne));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Ar));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Kr));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Xe));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Ra));
                    x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Og));
                    break;
                }
                default:
                    throw Error("Unknown group %d", group);
                }
                if (neg)
                {
                    x_atom.reset(QueryMolecule::Atom::nicht(x_atom.release()));
                }
                qatom.reset(x_atom.release());
            }
            else if (scanner.lookNext() == 'X')
            {
                scanner.skip(1);
                std::unique_ptr<QueryMolecule::Atom> x_atom = std::make_unique<QueryMolecule::Atom>();
                x_atom->type = QueryMolecule::OP_AND;
                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_H)));
                x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_C)));
                if (neg)
                {
                    x_atom.reset(QueryMolecule::Atom::nicht(x_atom.release()));
                }
                qatom.reset(x_atom.release());
            }
            else if (scanner.lookNext() == 'N')
            {
                scanner.skip(1);
                std::unique_ptr<QueryMolecule::Atom> x_atom = std::make_unique<QueryMolecule::Atom>();
                x_atom->type = QueryMolecule::OP_OR;
                x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_O));
                x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_N));
                x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_F));
                x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Cl));
                x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Br));
                if (neg)
                {
                    x_atom.reset(QueryMolecule::Atom::nicht(x_atom.release()));
                }
                qatom.reset(x_atom.release());
            }
            else
            {
                element = scanner.readUnsigned();
                if (qatom.get() == 0)
                    throw Error("'#%d' atom representation allowed only for query molecules", element);
                if (!smarts_mode)
                    throw Error("#num notation can be used only with smarts_mode");
            }
        }
        // Now we check that we have here an element from the periodic table.
        // We assume that this must be an alphabetic character and also
        // something not from the alphabetic SMARTS 'atomic primitives'
        // (see http://www.daylight.com/dayhtml/doc/theory/theory.smarts.html).
        else if (isalpha(next) && strchr("hrvxastiqw", next) == NULL)
        {
            scanner.skip(1);

            if (next == 'c')
            {
                element = ELEM_C;
                aromatic = ATOM_AROMATIC;
            }
            else if (next == 'n')
            {
                element = ELEM_N;
                aromatic = ATOM_AROMATIC;
            }
            else if (next == 'o')
            {
                element = ELEM_O;
                aromatic = ATOM_AROMATIC;
            }
            else if (next == 'p')
            {
                element = ELEM_P;
                aromatic = ATOM_AROMATIC;
            }
            else if (islower(next))
                throw Error("unrecognized lowercase symbol: %c", next);

            // Now we are sure that 'next' is a capital letter

            // Check if we have a lowercase letter right after...
            else if (isalpha(scanner.lookNext()) && islower(scanner.lookNext()) &&
                     // If a lowercase letter is following the uppercase letter,
                     // we should consider reading them as a single element.
                     // They can possibly not form an element: for example,
                     // [Nr] is formally a nitrogen in a ring (although nobody would
                     // write it that way: [N;r] is much more clear).
                     (Element::fromTwoChars2(static_cast<char>(next), scanner.lookNext())) > 0 &&
                     (Element::fromTwoChars2(static_cast<char>(next), scanner.lookNext()) != ELEM_Cn))
            {
                element = Element::fromTwoChars2(static_cast<char>(next), scanner.lookNext());
                scanner.skip(1);
                if (smarts_mode)
                    if (element == ELEM_As || element == ELEM_Se || element == ELEM_Si || element == ELEM_Te)
                        aromatic = ATOM_ALIPHATIC;
            }
            else if ((next == 'C' && scanner.lookNext() == 'n') && first_in_brackets)
            {
                scanner.skip(1);
                element = ELEM_Cn;
            }
            else
            {
                // It is a single-char uppercase element identifier then
                element = Element::fromChar(static_cast<char>(next));

                if (smarts_mode)
                    if (element == ELEM_C || element == ELEM_N || element == ELEM_O || element == ELEM_P || element == ELEM_S)
                        aromatic = ATOM_ALIPHATIC;
            }
        }
        else if (next == '@')
        {
            int chirality_type = QueryMolecule::CHIRALITY_GENERAL;
            int chirality_value = QueryMolecule::CHIRALITY_ANTICLOCKWISE;
            if (!smarts_mode)
                atom.chirality = chirality_value;
            scanner.skip(1);
            if (scanner.lookNext() == '@')
            {
                chirality_value = QueryMolecule::CHIRALITY_CLOCKWISE;
                if (!smarts_mode)
                    atom.chirality = chirality_value;
                scanner.skip(1);
            }
            else
            {
                std::string current(static_cast<const char*>(scanner.curptr()), static_cast<size_t>(scanner.length() - scanner.tell()));
                std::smatch match;
                if (std::regex_search(current, match, std::regex("^(TH|AL)([1-2])")))
                {
                    int value = std::stoi(match[2]);
                    if (!smarts_mode)
                        atom.chirality = value;
                    scanner.skip(3);
                    if (match[1] == "TH")
                        chirality_type = QueryMolecule::CHIRALITY_TETRAHEDRAL;
                    else if (match[1] == "AL")
                        chirality_type = QueryMolecule::CHIRALITY_ALLENE_LIKE;
                    chirality_value = value;
                }
                else if (std::regex_search(current, match, std::regex("^SP([1-3])")))
                {
                    // this type of chirality not supported. just skip it.
                    scanner.skip(3);
                    chirality_type = QueryMolecule::CHIRALITY_SQUARE_PLANAR;
                    chirality_value = std::stoi(match[1]);
                }
                else if (std::regex_search(current, match, std::regex(R"((TB([1-9]|1[0-9]|20)|OH([1-9]|1\d|2\d|30))(?!\d))")))
                {
                    constexpr int TB_GROUP = 2;
                    constexpr int OH_GROUP = 3;
                    int value = std::stoi(match.str(TB_GROUP).empty() ? match.str(OH_GROUP) : match.str(TB_GROUP));
                    if (value <= 2 && !smarts_mode)
                        atom.chirality = value;
                    scanner.skip(3);
                    if (value >= 10)
                        scanner.skip(1);
                    chirality_type = match.str(TB_GROUP).empty() ? QueryMolecule::CHIRALITY_OCTAHEDRAL : QueryMolecule::CHIRALITY_TRIGONAL_BIPYRAMIDAL;
                    chirality_value = value;
                }
            }
            if (scanner.lookNext() == '?')
            {
                if (!smarts_mode)
                    throw Error("@? can be used only with smarts_mode");
                chirality_value |= QueryMolecule::CHIRALITY_OR_UNSPECIFIED;
                scanner.skip(1);
            }
            if (smarts_mode)
                subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_CHIRALITY, chirality_type, chirality_value);
        }
        else if (next == '+' || next == '-')
        {
            char c = scanner.readChar();
            if (c == '+')
                atom.charge = 1;
            else
                atom.charge = -1;

            if (isdigit(scanner.lookNext()))
                atom.charge *= scanner.readUnsigned();
            else
                while (scanner.lookNext() == c)
                {
                    scanner.skip(1);
                    if (c == '+')
                        atom.charge++;
                    else
                        atom.charge--;
                }

            if (qatom.get() != 0)

                subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_CHARGE, atom.charge);
        }
        else if (next == 'a') // can be [as] or SMARTS aromaticity flag
        {
            scanner.skip(1);

            if (scanner.lookNext() == 's')
            {
                scanner.skip(1);

                element = ELEM_As;
                aromatic = ATOM_AROMATIC;
            }
            else
            {
                if (qatom.get() == 0)
                    throw Error("'a' specifier is allowed only for query molecules");

                subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_AROMATICITY, ATOM_AROMATIC);
            }
        }
        else if (next == 's') // can be [s], [se] or [si]
        {
            scanner.skip(1);
            if (scanner.lookNext() == 'e')
            {
                scanner.skip(1);
                element = ELEM_Se;
                aromatic = ATOM_AROMATIC;
            }
            else if (scanner.lookNext() == 'i')
            {
                // Aromatic Si cannot occure in SMILES by specification, but
                // Cactvs produces it
                scanner.skip(1);
                element = ELEM_Si;
                aromatic = ATOM_AROMATIC;
            }
            else
            {
                element = ELEM_S;
                aromatic = ATOM_AROMATIC;
            }
        }
        else if (next == 't') // [te]
        {
            // Aromatic Te cannot occure in SMILES by specification, but
            // RDKit produces it within extended SMILES
            scanner.skip(1);
            if (scanner.lookNext() == 'e')
            {
                scanner.skip(1);
                element = ELEM_Te;
                aromatic = ATOM_AROMATIC;
            }
            else
                throw Error("invalid character within atom description: '%c'", next);
        }
        else if (next == 'h')
        {
            scanner.skip(1);

            if (qatom.get() == 0)
                throw Error("'h' specifier is allowed only for query molecules");

            if (isdigit(scanner.lookNext()))
            {
                subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_IMPLICIT_H, scanner.readUnsigned());
            }
            else
            {
                subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_IMPLICIT_H, 1, 100);
            }
        }
        else if (next == 'r')
        {
            scanner.skip(1);
            if (qatom.get() == 0)
                throw Error("'r' specifier is allowed only for query molecules");

            if (isdigit(scanner.lookNext()))
                subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_SMALLEST_RING_SIZE, scanner.readUnsigned());
            else if (smarts_mode)
                subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_SMALLEST_RING_SIZE, 1, 100);
            else
                subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_RING_BONDS, 1, 100);
        }
        else if (next == 'v')
        {
            scanner.skip(1);
            if (qatom.get() == 0)
                throw Error("'v' specifier is allowed only for query molecules");

            int val = 1;

            if (isdigit(scanner.lookNext()))
                val = scanner.readUnsigned();

            subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_TOTAL_BOND_ORDER, val);
        }
        else if (next == 'w')
        {
            scanner.skip(1);
            if (qatom.get() == 0)
                throw Error("'v' specifier is allowed only for query molecules");

            int val = 1;

            if (isdigit(scanner.lookNext()))
                val = scanner.readUnsigned();

            subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_SUBSTITUENTS, val);
        }
        else if (next == 'x' || next == 'q')
        {
            scanner.skip(1);
            if (qatom.get() == 0)
                throw Error("'x' specifier is allowed only for query molecules");

            if (isdigit(scanner.lookNext()))
                subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_RING_BONDS, scanner.readUnsigned());
            else
                subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_RING_BONDS, 1, 100);
        }
        else if (next == ':')
        {
            scanner.skip(1);
            if (scanner.lookNext() == '?')
            {
                if (qatom.get() == 0)
                    throw Error("ignorable AAM numbers are allowed only for queries");
                atom.ignorable_aam = true;
                scanner.skip(1);
            }
            atom.aam = scanner.readUnsigned();
        }
        else if (next == 'i')
        {
            scanner.skip(1);
            subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_PI_BONDED, 1);
        }
        else
            throw Error("invalid character within atom description: '%c'", next);

        if (element > 0)
        {
            if (qatom.get() != 0)
                subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_NUMBER, element);
            else
            {
                if (element_assigned)
                    throw Error("two element labels for one atom");
                atom.label = element;
            }
            element_assigned = true;
        }

        if (aromatic != 0)
        {
            if (aromatic == ATOM_AROMATIC)
                atom.aromatic = true;

            if (qatom.get() != 0)
            {
                if (subatom.get() == 0)
                    subatom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_AROMATICITY, aromatic);
                else
                    subatom.reset(QueryMolecule::Atom::und(subatom.release(), new QueryMolecule::Atom(QueryMolecule::ATOM_AROMATICITY, aromatic)));
            }
        }

        if (subatom.get() != 0)
        {
            if (neg)
            {
                subatom.reset(QueryMolecule::Atom::nicht(subatom.release()));
                neg = false;
            }
            if (qatom != nullptr)
            {
                qatom.reset(QueryMolecule::Atom::und(qatom.release(), subatom.release()));
            }
            else
            {
                qatom = std::move(subatom);
            }
        }

        // we check for isotope_set here to treat [2H] as deuterium atom,
        // not like something with isotope number 2 and h-count 1
        if (!isotope_set)
            first_in_brackets = false;
    }
}