void CmlLoader::_loadMoleculeElement()

in core/indigo-core/molecule/src/cml_loader.cpp [156:1214]


void CmlLoader::_loadMoleculeElement(XMLHandle& handle)
{
    std::unordered_map<std::string, int> atoms_id;
    std::unordered_map<std::string, size_t> atoms_id_int;

    // Function that mappes atom id as a string to an atom index
    auto getAtomIdx = [&](const char* id) {
        auto it = atoms_id.find(id);
        if (it == atoms_id.end())
            throw Error("atom id %s cannot be found", id);
        return it->second;
    };

    QS_DEF(Array<int>, total_h_count);
    QS_DEF(Array<int>, query_h_count);
    atoms_id.clear();
    atoms_id_int.clear();
    total_h_count.clear();
    query_h_count.clear();

    const char* title = handle.ToElement()->Attribute("title");

    if (title != 0)
        _bmol->name.readString(title, true);

    QS_DEF(std::vector<Atom>, atoms);
    atoms.clear();

    //
    // Read elements into an atoms array first and the parse them
    //

    XMLHandle atom_array = handle.FirstChildElement("atomArray");
    // Read atoms as xml attributes
    // <atomArray
    //       atomID="a1 a2 a3 ... "
    //       elementType="O C O ..."
    //       hydrogenCount="1 0 0 ..."
    // />
    splitStringIntoProperties(atom_array.ToElement()->Attribute("atomID"), atoms, &Atom::id);

    // Fill id if any were found
    size_t atom_index;
    for (atom_index = 0; atom_index < atoms.size(); atom_index++)
        atoms_id_int.emplace(atoms[atom_index].id, atom_index);

    splitStringIntoProperties(atom_array.ToElement()->Attribute("elementType"), atoms, &Atom::element_type);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("mrvPseudo"), atoms, &Atom::label);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("hydrogenCount"), atoms, &Atom::hydrogen_count);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("isotope"), atoms, &Atom::isotope);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("isotopeNumber"), atoms, &Atom::isotope);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("formalCharge"), atoms, &Atom::formal_charge);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("spinMultiplicity"), atoms, &Atom::spin_multiplicity);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("radical"), atoms, &Atom::radical);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("mrvValence"), atoms, &Atom::valence);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("rgroupRef"), atoms, &Atom::rgroupref);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("attachmentPoint"), atoms, &Atom::attpoint);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("attachmentOrder"), atoms, &Atom::attorder);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("mrvQueryProps"), atoms, &Atom::query_props);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("mrvAlias"), atoms, &Atom::alias);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("mrvMap"), atoms, &Atom::atom_mapping);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("reactionStereo"), atoms, &Atom::atom_inversion);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("exactChage"), atoms, &Atom::atom_exact_change);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("x2"), atoms, &Atom::x);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("y2"), atoms, &Atom::y);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("x3"), atoms, &Atom::x);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("y3"), atoms, &Atom::y);
    splitStringIntoProperties(atom_array.ToElement()->Attribute("z3"), atoms, &Atom::z);

    // Read atoms as nested xml elements
    //   <atomArray>
    //     <atom id="a1" elementType="H" />
    for (XMLElement* elem = atom_array.FirstChild().ToElement(); elem; elem = elem->NextSiblingElement())
    {
        if (strncmp(elem->Value(), "atom", 4) != 0)
            continue;

        const char* id = elem->Attribute("id");
        if (id == 0)
            throw Error("atom without an id");

        // Check if this atom has already been parsed
        auto it = atoms_id_int.find(id);
        if (it != end(atoms_id_int))
            atom_index = it->second;
        else
        {
            atom_index = atoms.size();
            atoms.emplace_back();
            atoms_id_int.emplace(id, atom_index);
        }

        Atom& a = atoms[atom_index];

        a.id = id;

        const char* element_type = elem->Attribute("elementType");

        if (element_type == 0)
            throw Error("atom without an elementType");
        a.element_type = element_type;

        const char* pseudo = elem->Attribute("mrvPseudo");

        if (pseudo != 0)
            a.label = pseudo;

        const char* alias = elem->Attribute("mrvAlias");

        if (alias != 0)
            a.alias = alias;

        const char* isotope = elem->Attribute("isotope");

        if (isotope == 0)
            isotope = elem->Attribute("isotopeNumber");

        if (isotope != 0)
            a.isotope = isotope;

        const char* charge = elem->Attribute("formalCharge");

        if (charge != 0)
            a.formal_charge = charge;

        const char* spinmultiplicity = elem->Attribute("spinMultiplicity");

        if (spinmultiplicity != 0)
            a.spin_multiplicity = spinmultiplicity;

        const char* radical = elem->Attribute("radical");

        if (radical != 0)
            a.radical = radical;

        const char* valence = elem->Attribute("mrvValence");

        if (valence != 0)
            a.valence = valence;

        const char* hcount = elem->Attribute("hydrogenCount");

        if (hcount != 0)
            a.hydrogen_count = hcount;

        const char* atom_map = elem->Attribute("mrvMap");

        if (atom_map != 0)
            a.atom_mapping = atom_map;

        const char* atom_inv = elem->Attribute("reactionStereo");

        if (atom_inv != 0)
            a.atom_inversion = atom_inv;

        const char* atom_exact = elem->Attribute("exactChange");

        if (atom_exact != 0)
            a.atom_exact_change = atom_exact;

        /*
         * Read coordinates
         */
        const char* x2 = elem->Attribute("x2");
        const char* y2 = elem->Attribute("y2");

        if (x2 != 0 && y2 != 0)
        {
            a.x = x2;
            a.y = y2;
        }
        else
        {
            const char* x3 = elem->Attribute("x3");
            const char* y3 = elem->Attribute("y3");
            const char* z3 = elem->Attribute("z3");

            if (x3 != 0 && y3 != 0 && z3 != 0)
            {
                a.x = x3;
                a.y = y3;
                a.z = z3;
            }
        }

        const char* rgroupref = elem->Attribute("rgroupRef");

        if (rgroupref != 0)
            a.rgroupref = rgroupref;

        const char* attpoint = elem->Attribute("attachmentPoint");

        if (attpoint != 0)
            a.attpoint = attpoint;

        const char* attorder = elem->Attribute("attachmentOrder");

        if (attorder != 0)
            a.attorder = attorder;

        const char* query_props = elem->Attribute("mrvQueryProps");

        if (query_props != 0)
        {
            if (_qmol != 0)
                a.query_props = query_props;
            else
                throw Error("'query features' are allowed only for queries");
        }
    }

    // Parse them
    for (auto& a : atoms)
    {
        int label = Element::fromString2(a.element_type.c_str());

        if ((label == -1) && (strncmp(a.element_type.c_str(), "R", 1) == 0))
            label = ELEM_RSITE;
        else if ((label == -1) && (strncmp(a.element_type.c_str(), "*", 1) == 0))
            label = ELEM_ATTPOINT;
        else if (!a.label.empty())
        {
            if (label == -1)
                label = ELEM_PSEUDO;
            else if (label == ELEM_C)
            {
                if ((strncmp(a.label.c_str(), "AH", 2) == 0) || (strncmp(a.label.c_str(), "QH", 2) == 0) || (strncmp(a.label.c_str(), "XH", 2) == 0) ||
                    (strncmp(a.label.c_str(), "MH", 2) == 0) || (strncmp(a.label.c_str(), "X", 1) == 0) || (strncmp(a.label.c_str(), "M", 1) == 0))
                {
                }
                else
                    label = ELEM_PSEUDO;
            }
        }
        else if (label == -1)
            label = ELEM_PSEUDO;

        int idx;
        if (label == ELEM_ATTPOINT)
        {
            atoms_id.emplace(a.id, -1);
        }
        else
        {
            if (_mol != 0)
            {
                idx = _mol->addAtom(label);

                if (label == ELEM_PSEUDO)
                {
                    if (!a.label.empty())
                        _mol->setPseudoAtom(idx, a.label.c_str());
                    else
                        _mol->setPseudoAtom(idx, a.element_type.c_str());
                }

                total_h_count.expandFill(idx + 1, -1);
                query_h_count.expandFill(idx + 1, -1);

                atoms_id.emplace(a.id, idx);

                if (!a.isotope.empty())
                {
                    int val;
                    if (sscanf(a.isotope.c_str(), "%d", &val) != 1)
                        throw Error("error parsing isotope");
                    _mol->setAtomIsotope(idx, val);
                }

                if (!a.formal_charge.empty())
                {
                    int val;
                    if (sscanf(a.formal_charge.c_str(), "%d", &val) != 1)
                        throw Error("error parsing charge");
                    _mol->setAtomCharge(idx, val);
                }

                if (!a.spin_multiplicity.empty())
                {
                    int val;
                    if (sscanf(a.spin_multiplicity.c_str(), "%d", &val) != 1)
                        throw Error("error parsing spin multiplicity");
                    _mol->setAtomRadical(idx, val);
                }

                if (!a.radical.empty())
                {
                    int val = 0;
                    if (strncmp(a.radical.c_str(), "divalent1", 9) == 0)
                        val = 1;
                    else if (strncmp(a.radical.c_str(), "monovalent", 10) == 0)
                        val = 2;
                    else if ((strncmp(a.radical.c_str(), "divalent3", 9) == 0) || (strncmp(a.radical.c_str(), "divalent", 8) == 0) ||
                             (strncmp(a.radical.c_str(), "triplet", 7) == 0))
                        val = 3;
                    _mol->setAtomRadical(idx, val);
                }

                if (a.spin_multiplicity.empty() && a.radical.empty())
                {
                    _mol->setAtomRadical(idx, 0);
                }

                if (!a.valence.empty())
                {
                    int val;
                    if (sscanf(a.valence.c_str(), "%d", &val) == 1)
                        _mol->setExplicitValence(idx, val);
                }

                if (!a.hydrogen_count.empty())
                {
                    int val;
                    if (sscanf(a.hydrogen_count.c_str(), "%d", &val) != 1)
                        throw Error("error parsing hydrogen count");
                    if (val < 0)
                        throw Error("negative hydrogen count");
                    total_h_count[idx] = val;
                }
            }
            else
            {
                std::unique_ptr<QueryMolecule::Atom> atom;
                int qhcount = -1;

                if (label == ELEM_PSEUDO)
                {
                    if (!a.label.empty())
                        atom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_PSEUDO, a.label.c_str());
                    else
                        atom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_PSEUDO, a.element_type.c_str());
                }
                else if (label == ELEM_RSITE)
                    atom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_RSITE, 0);
                else
                    atom = std::make_unique<QueryMolecule::Atom>(QueryMolecule::ATOM_NUMBER, label);

                if (!a.query_props.empty() && (strncmp(a.query_props.c_str(), "L", 1) == 0)) // _ATOM_LIST
                {
                    std::unique_ptr<QueryMolecule::Atom> atomlist;

                    BufferScanner strscan(a.query_props.c_str());
                    QS_DEF(Array<char>, el);
                    QS_DEF(Array<char>, delim);
                    el.clear();
                    delim.clear();

                    strscan.skip(1);
                    delim.push(strscan.readChar());
                    delim.push(':');
                    delim.push(0);

                    while (!strscan.isEOF())
                    {
                        strscan.readWord(el, delim.ptr());
                        _appendQueryAtom(el.ptr(), atomlist);
                        if (strscan.readChar() == ':')
                            break;
                    }

                    if (delim[0] == '!')
                        atomlist.reset(QueryMolecule::Atom::nicht(atomlist.release()));

                    atom.reset(atomlist.release());
                }
                else if (!a.query_props.empty() && (strncmp(a.query_props.c_str(), "A", 1) == 0)) // _ATOM_A
                {
                    atom.get()->removeConstraints(QueryMolecule::ATOM_NUMBER);
                    atom.reset(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_H)));
                }
                else if (!a.query_props.empty() && (strncmp(a.query_props.c_str(), "Q", 1) == 0)) // _ATOM_Q
                {
                    atom.get()->removeConstraints(QueryMolecule::ATOM_NUMBER);
                    atom.reset(QueryMolecule::Atom::und(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_H)),
                                                        QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_C))));
                }

                if (label == ELEM_C && !a.label.empty())
                {
                    if (strncmp(a.label.c_str(), "AH", 2) == 0)
                    {
                        std::unique_ptr<QueryMolecule::Atom> x_atom = std::make_unique<QueryMolecule::Atom>();
                        x_atom->type = QueryMolecule::OP_NONE;
                        atom.get()->removeConstraints(QueryMolecule::ATOM_NUMBER);
                        atom.reset(x_atom.release());
                    }
                    else if (strncmp(a.label.c_str(), "QH", 2) == 0)
                    {
                        atom.get()->removeConstraints(QueryMolecule::ATOM_NUMBER);
                        atom.reset(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_C)));
                    }
                    else if (strncmp(a.label.c_str(), "XH", 2) == 0)
                    {
                        std::unique_ptr<QueryMolecule::Atom> x_atom = std::make_unique<QueryMolecule::Atom>();

                        x_atom->type = QueryMolecule::OP_OR;
                        x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_F));
                        x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Cl));
                        x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Br));
                        x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_I));
                        x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_At));
                        x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_H));

                        atom.get()->removeConstraints(QueryMolecule::ATOM_NUMBER);
                        atom.reset(x_atom.release());
                    }
                    else if (strncmp(a.label.c_str(), "X", 1) == 0)
                    {
                        std::unique_ptr<QueryMolecule::Atom> x_atom = std::make_unique<QueryMolecule::Atom>();

                        x_atom->type = QueryMolecule::OP_OR;
                        x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_F));
                        x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Cl));
                        x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Br));
                        x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_I));
                        x_atom->children.add(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_At));

                        atom.get()->removeConstraints(QueryMolecule::ATOM_NUMBER);
                        atom.reset(x_atom.release());
                    }
                    else if (strncmp(a.label.c_str(), "MH", 2) == 0)
                    {
                        std::unique_ptr<QueryMolecule::Atom> x_atom = std::make_unique<QueryMolecule::Atom>();

                        x_atom->type = QueryMolecule::OP_AND;
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_C)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_N)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_O)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_F)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_P)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_S)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Cl)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Se)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Br)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_I)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_At)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_He)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Ne)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Ar)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Kr)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Xe)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Rn)));

                        atom.get()->removeConstraints(QueryMolecule::ATOM_NUMBER);
                        atom.reset(x_atom.release());
                    }
                    else if (strncmp(a.label.c_str(), "M", 1) == 0)
                    {
                        std::unique_ptr<QueryMolecule::Atom> x_atom = std::make_unique<QueryMolecule::Atom>();

                        x_atom->type = QueryMolecule::OP_AND;
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_C)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_N)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_O)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_F)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_P)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_S)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Cl)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Se)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Br)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_I)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_At)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_He)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Ne)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Ar)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Kr)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Xe)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_Rn)));
                        x_atom->children.add(QueryMolecule::Atom::nicht(new QueryMolecule::Atom(QueryMolecule::ATOM_NUMBER, ELEM_H)));

                        atom.get()->removeConstraints(QueryMolecule::ATOM_NUMBER);
                        atom.reset(x_atom.release());
                    }
                }
                else if (!a.query_props.empty()) // Query features
                {
                    BufferScanner strscan(a.query_props.c_str());
                    QS_DEF(Array<char>, qf);
                    QS_DEF(Array<char>, delim);
                    qf.clear();
                    delim.clear();

                    delim.push(';');
                    delim.push(0);

                    while (!strscan.isEOF())
                    {
                        strscan.readWord(qf, delim.ptr());
                        if (strncmp(qf.ptr(), "rb", 2) == 0)
                        {
                            BufferScanner qfscan(qf.ptr());
                            qfscan.skip(2);
                            int rbcount;
                            if (qfscan.lookNext() == '*')
                                rbcount = -2;
                            else
                                rbcount = qfscan.readInt1();

                            if (rbcount > 0)
                                atom.reset(QueryMolecule::Atom::und(
                                    atom.release(), new QueryMolecule::Atom(QueryMolecule::ATOM_RING_BONDS, rbcount, (rbcount < 4 ? rbcount : 100))));
                            else if (rbcount == 0)
                                atom.reset(QueryMolecule::Atom::und(atom.release(), new QueryMolecule::Atom(QueryMolecule::ATOM_RING_BONDS, 0)));
                            else if (rbcount == -2)
                                atom.reset(QueryMolecule::Atom::und(atom.release(), new QueryMolecule::Atom(QueryMolecule::ATOM_RING_BONDS_AS_DRAWN, 0)));
                        }
                        else if (strncmp(qf.ptr(), "s", 1) == 0)
                        {
                            BufferScanner qfscan(qf.ptr());
                            qfscan.skip(1);
                            int subst;
                            if (qfscan.lookNext() == '*')
                            {
                                subst = -2;
                            }
                            else
                            {
                                subst = qfscan.readInt1();
                            }

                            if (subst == 0)
                            {
                                atom.reset(QueryMolecule::Atom::und(atom.release(), new QueryMolecule::Atom(QueryMolecule::ATOM_SUBSTITUENTS, 0)));
                            }
                            else if (subst == -2)
                            {
                                atom.reset(QueryMolecule::Atom::und(atom.release(), new QueryMolecule::Atom(QueryMolecule::ATOM_SUBSTITUENTS_AS_DRAWN, 0)));
                            }
                            else if (subst > 0)
                            {
                                atom.reset(QueryMolecule::Atom::und(
                                    atom.release(), new QueryMolecule::Atom(QueryMolecule::ATOM_SUBSTITUENTS, subst, (subst < 6 ? subst : 100))));
                            }
                        }
                        else if (strncmp(qf.ptr(), "u", 1) == 0)
                        {
                            BufferScanner qfscan(qf.ptr());
                            qfscan.skip(1);
                            bool unsat = (qfscan.readInt1() > 0);
                            if (unsat)
                                atom.reset(QueryMolecule::Atom::und(atom.release(), new QueryMolecule::Atom(QueryMolecule::ATOM_UNSATURATION, 0)));
                        }
                        else if (strncmp(qf.ptr(), "H", 1) == 0)
                        {
                            BufferScanner qfscan(qf.ptr());
                            qfscan.skip(1);
                            qhcount = qfscan.readInt1();
                            /*
                                                 if (total_h > 0)
                                                    atom.reset(QueryMolecule::Atom::und(atom.release(),
                                                             new QueryMolecule::Atom(QueryMolecule::ATOM_TOTAL_H, total_h)));
                            */
                        }

                        if (!strscan.isEOF())
                            strscan.skip(1);
                    }
                }

                if (!a.formal_charge.empty())
                {
                    int val;
                    if (sscanf(a.formal_charge.c_str(), "%d", &val) != 1)
                        throw Error("error parsing charge");
                    atom.reset(QueryMolecule::Atom::und(atom.release(), new QueryMolecule::Atom(QueryMolecule::ATOM_CHARGE, val)));
                }

                if (!a.isotope.empty())
                {
                    int val;
                    if (sscanf(a.isotope.c_str(), "%d", &val) != 1)
                        throw Error("error parsing isotope");
                    atom.reset(QueryMolecule::Atom::und(atom.release(), new QueryMolecule::Atom(QueryMolecule::ATOM_ISOTOPE, val)));
                }

                if (!a.spin_multiplicity.empty())
                {
                    int val;
                    if (sscanf(a.spin_multiplicity.c_str(), "%d", &val) != 1)
                        throw Error("error parsing spin multiplicity");
                    atom.reset(QueryMolecule::Atom::und(atom.release(), new QueryMolecule::Atom(QueryMolecule::ATOM_RADICAL, val)));
                }

                if (!a.radical.empty())
                {
                    int val = 0;
                    if (strncmp(a.radical.c_str(), "divalent1", 9) == 0)
                        val = 1;
                    else if (strncmp(a.radical.c_str(), "monovalent", 10) == 0)
                        val = 2;
                    else if ((strncmp(a.radical.c_str(), "divalent3", 9) == 0) || (strncmp(a.radical.c_str(), "divalent", 8) == 0) ||
                             (strncmp(a.radical.c_str(), "triplet", 7) == 0))
                        val = 3;
                    atom.reset(QueryMolecule::Atom::und(atom.release(), new QueryMolecule::Atom(QueryMolecule::ATOM_RADICAL, val)));
                }

                if (!a.valence.empty())
                {
                    int val;
                    if (sscanf(a.valence.c_str(), "%d", &val) == 1)
                        atom.reset(QueryMolecule::Atom::und(atom.release(), new QueryMolecule::Atom(QueryMolecule::ATOM_VALENCE, val)));
                }

                idx = _qmol->addAtom(atom.release());
                atoms_id.emplace(a.id, idx);
                total_h_count.expandFill(idx + 1, -1);
                query_h_count.expandFill(idx + 1, -1);
                query_h_count[idx] = qhcount;
            }

            if (!a.rgroupref.empty())
            {
                int val;
                if (sscanf(a.rgroupref.c_str(), "%d", &val) != 1)
                    throw Error("error parsing R-group reference");
                _bmol->allowRGroupOnRSite(idx, val);
            }

            if (!a.attpoint.empty())
            {
                int val;
                if (strncmp(a.attpoint.c_str(), "both", 4) == 0)
                    val = 3;
                else if (sscanf(a.attpoint.c_str(), "%d", &val) != 1)
                    throw Error("error parsing Attachment point");
                for (int att_idx = 0; (1 << att_idx) <= val; att_idx++)
                    if (val & (1 << att_idx))
                        _bmol->addAttachmentPoint(att_idx + 1, idx);
            }
            Vec3f a_pos;

            if (!a.x.empty())
            {
                a_pos.x = readFloat(a.x.c_str());
            }
            if (!a.y.empty())
            {
                a_pos.y = readFloat(a.y.c_str());
            }
            if (!a.z.empty())
            {
                a_pos.z = readFloat(a.z.c_str());
            }

            _bmol->setAtomXyz(idx, a_pos);

            if (!a.alias.empty())
            {
                if (strncmp(a.alias.c_str(), "0", 1) != 0)
                {
                    _bmol->setAlias(idx, a.alias.c_str());
                }
            }

            if (!a.atom_mapping.empty())
            {
                int val;
                if (sscanf(a.atom_mapping.c_str(), "%d", &val) != 1)
                    throw Error("error parsing atom-atom mapping");
                _bmol->reaction_atom_mapping[idx] = val;
            }

            if (!a.atom_inversion.empty())
            {
                if (strncmp(a.atom_inversion.c_str(), "Inv", 3) == 0)
                    _bmol->reaction_atom_inversion[idx] = 1;
                else if (strncmp(a.atom_inversion.c_str(), "Ret", 3) == 0)
                    _bmol->reaction_atom_inversion[idx] = 2;
            }

            if (!a.atom_exact_change.empty())
            {
                int val;
                if (sscanf(a.atom_exact_change.c_str(), "%d", &val) != 1)
                    throw Error("error parsing atom exact change flag");
                _bmol->reaction_atom_exact_change[idx] = val;
            }
        }
        /*
              if (!a.attorder.empty())
              {
                 int val;
                 if (sscanf(a.attorder.c_str(), "%d", &val) != 1)
                    throw Error("error parsing Attachment order");
                 mol.setRSiteAttachmentOrder(idx, nei_idx - 1, val - 1);
              }
        */
    }

    // Bonds
    bool have_cistrans_notation = false;

    for (XMLElement* elem = handle.FirstChildElement("bondArray").FirstChild().ToElement(); elem; elem = elem->NextSiblingElement())
    {
        if (strncmp(elem->Value(), "bond", 4) != 0)
            continue;
        const char* atom_refs = elem->Attribute("atomRefs2");
        if (atom_refs == 0)
            throw Error("bond without atomRefs2");

        int idx;

        BufferScanner strscan(atom_refs);
        QS_DEF(Array<char>, id);

        strscan.readWord(id, 0);
        int beg = getAtomIdx(id.ptr());
        strscan.skipSpace();
        strscan.readWord(id, 0);
        int end = getAtomIdx(id.ptr());

        if ((beg < 0) || (end < 0))
            continue;

        const char* order = elem->Attribute("order");
        if (order == 0)
            throw Error("bond without an order");

        int order_val;
        {
            if (order[0] == 'A' && order[1] == 0)
                order_val = BOND_AROMATIC;
            else if (order[0] == 'S' && order[1] == 0)
                order_val = BOND_SINGLE;
            else if (order[0] == 'D' && order[1] == 0)
                order_val = BOND_DOUBLE;
            else if (order[0] == 'T' && order[1] == 0)
                order_val = BOND_TRIPLE;
            else if (sscanf(order, "%d", &order_val) != 1)
                throw Error("error parsing order");
        }

        const char* query_type = elem->Attribute("queryType");
        const char* topology = elem->Attribute("topology");

        if (_mol != 0)
        {
            if ((query_type == 0) && (topology == 0))
                idx = _mol->addBond_Silent(beg, end, order_val);
            else
                throw Error("'query type' bonds are allowed only for queries");
        }
        else
        {
            std::unique_ptr<QueryMolecule::Bond> bond;

            if (query_type == 0)
            {
                if (order_val == BOND_SINGLE || order_val == BOND_DOUBLE || order_val == BOND_TRIPLE || order_val == BOND_AROMATIC)
                    bond.reset(new QueryMolecule::Bond(QueryMolecule::BOND_ORDER, order_val));
            }
            else if (strncmp(query_type, "SD", 2) == 0)
                bond.reset(QueryMolecule::Bond::und(QueryMolecule::Bond::nicht(new QueryMolecule::Bond(QueryMolecule::BOND_ORDER, BOND_AROMATIC)),
                                                    QueryMolecule::Bond::oder(new QueryMolecule::Bond(QueryMolecule::BOND_ORDER, BOND_SINGLE),
                                                                              new QueryMolecule::Bond(QueryMolecule::BOND_ORDER, BOND_DOUBLE))));
            else if (strncmp(query_type, "SA", 2) == 0)
                bond.reset(QueryMolecule::Bond::oder(new QueryMolecule::Bond(QueryMolecule::BOND_ORDER, BOND_SINGLE),
                                                     new QueryMolecule::Bond(QueryMolecule::BOND_ORDER, BOND_AROMATIC)));
            else if (strncmp(query_type, "DA", 2) == 0)
                bond.reset(QueryMolecule::Bond::oder(new QueryMolecule::Bond(QueryMolecule::BOND_ORDER, BOND_DOUBLE),
                                                     new QueryMolecule::Bond(QueryMolecule::BOND_ORDER, BOND_AROMATIC)));
            else if (strncmp(query_type, "Any", 3) == 0)
                bond = std::make_unique<QueryMolecule::Bond>();
            else
                throw Error("unknown bond type: %d", order);

            const char* bond_topology = elem->Attribute("topology");
            if (bond_topology != 0)
            {
                if (strncmp(bond_topology, "ring", 4) == 0)
                    bond.reset(QueryMolecule::Bond::und(bond.release(), new QueryMolecule::Bond(QueryMolecule::BOND_TOPOLOGY, TOPOLOGY_RING)));
                else if (strncmp(bond_topology, "chain", 5) == 0)
                    bond.reset(QueryMolecule::Bond::und(bond.release(), new QueryMolecule::Bond(QueryMolecule::BOND_TOPOLOGY, TOPOLOGY_CHAIN)));
                else
                    throw Error("unknown topology: %s", bond_topology);
            }

            idx = _qmol->addBond(beg, end, bond.release());
        }

        int dir = 0;

        XMLElement* bs_elem = elem->FirstChildElement("bondStereo");

        if (bs_elem != 0)
        {
            const char* text = bs_elem->GetText();
            if (text != 0)
            {
                if (text[0] == 'W' && text[1] == 0)
                    dir = BOND_UP;
                if (text[0] == 'H' && text[1] == 0)
                    dir = BOND_DOWN;
                if ((text[0] == 'C' || text[0] == 'T') && text[1] == 0)
                    have_cistrans_notation = true;
            }
            else
            {
                const char* convention = bs_elem->Attribute("convention");
                if (convention != 0)
                {
                    have_cistrans_notation = true;
                }
            }
        }

        if (dir != 0)
            _bmol->setBondDirection(idx, dir);

        const char* brcenter = elem->Attribute("mrvReactingCenter");

        if (brcenter != 0)
        {
            int val;
            if (sscanf(brcenter, "%d", &val) != 1)
                throw Error("error parsing reacting center flag");
            _bmol->reaction_bond_reacting_center[idx] = val;
        }
    }

    // Implicit H counts
    int i, j;

    for (i = _bmol->vertexBegin(); i != _bmol->vertexEnd(); i = _bmol->vertexNext(i))
    {
        int h = total_h_count[i];

        if (h < 0)
            continue;

        const Vertex& vertex = _bmol->getVertex(i);

        for (j = vertex.neiBegin(); j != vertex.neiEnd(); j = vertex.neiNext(j))
            if (_bmol->getAtomNumber(vertex.neiVertex(j)) == ELEM_H)
                h--;

        if (h < 0)
            throw Error("hydrogenCount on atom %d is less than the number of explicit hydrogens");

        if (_mol != 0)
            _mol->setImplicitH(i, h);
    }

    // Query H counts
    if (_qmol != 0)
    {
        for (i = _bmol->vertexBegin(); i != _bmol->vertexEnd(); i = _bmol->vertexNext(i))
        {
            int expl_h = 0;

            if (query_h_count[i] >= 0)
            {
                // count explicit hydrogens
                const Vertex& vertex = _bmol->getVertex(i);

                for (j = vertex.neiBegin(); j != vertex.neiEnd(); j = vertex.neiNext(j))
                {
                    if (_bmol->getAtomNumber(vertex.neiVertex(j)) == ELEM_H)
                        expl_h++;
                }
            }

            if (query_h_count[i] == 0)
            {
                // no hydrogens unless explicitly drawn
                _qmol->resetAtom(i, QueryMolecule::Atom::und(_qmol->releaseAtom(i), new QueryMolecule::Atom(QueryMolecule::ATOM_TOTAL_H, expl_h)));
            }
            else if (query_h_count[i] > 0)
            {
                // (_hcount[k] - 1) or more atoms in addition to explicitly drawn
                // no hydrogens unless explicitly drawn
                _qmol->resetAtom(
                    i, QueryMolecule::Atom::und(_qmol->releaseAtom(i), new QueryMolecule::Atom(QueryMolecule::ATOM_TOTAL_H, expl_h + query_h_count[i], 100)));
            }
        }
    }

    // Tetrahedral stereocenters
    for (XMLElement* elem = handle.FirstChildElement("atomArray").FirstChild().ToElement(); elem; elem = elem->NextSiblingElement())
    {
        const char* id = elem->Attribute("id");

        if (id == 0)
            throw Error("atom without an id");

        int idx = getAtomIdx(id);

        XMLElement* ap_elem = elem->FirstChildElement("atomParity");

        if (ap_elem == 0)
            continue;

        const char* ap_text = ap_elem->GetText();

        if (ap_text == 0)
            continue;

        int val;
        if (sscanf(ap_text, "%d", &val) != 1)
            throw Error("error parsing atomParity");

        const char* refs4 = ap_elem->Attribute("atomRefs4");

        if (refs4 != 0)
        {
            BufferScanner strscan(refs4);
            QS_DEF(Array<char>, a_id);
            int pyramid[4];

            for (int k = 0; k < 4; k++)
            {
                strscan.skipSpace();
                strscan.readWord(a_id, 0);
                pyramid[k] = getAtomIdx(a_id.ptr());
                if (pyramid[k] == idx)
                    pyramid[k] = -1;
            }

            if (val < 0)
                std::swap(pyramid[0], pyramid[1]);

            MoleculeStereocenters::moveMinimalToEnd(pyramid);

            _bmol->addStereocenters(idx, MoleculeStereocenters::ATOM_ABS, 0, pyramid);
        }
    }

    if (_bmol->stereocenters.size() == 0 && BaseMolecule::hasCoord(*_bmol))
    {
        std::vector<int> sensible_bond_orientations;
        sensible_bond_orientations.resize(_bmol->edgeEnd(), 0);
        _bmol->buildFromBondsStereocenters(stereochemistry_options, sensible_bond_orientations.data());
        if (!stereochemistry_options.ignore_errors)
            for (i = 0; i < _bmol->vertexCount(); i++)
                if (_bmol->getBondDirection(i) > 0 && !sensible_bond_orientations[i])
                    throw Error("direction of bond #%d makes no sense", i);
    }

    // Cis-trans stuff
    if (have_cistrans_notation)
    {
        int bond_idx = -1;

        for (XMLElement* elem = handle.FirstChildElement("bondArray").FirstChild().ToElement(); elem; elem = elem->NextSiblingElement())
        {
            if (strncmp(elem->Value(), "bond", 4) != 0)
                continue;
            bond_idx++;
            XMLElement* bs_elem = elem->FirstChildElement("bondStereo");

            if (bs_elem == 0)
                continue;

            const char* convention = bs_elem->Attribute("convention");
            if (convention != 0)
            {
                const char* convention_value = bs_elem->Attribute("conventionValue");
                if (convention_value != 0)
                {
                    if (strncmp(convention, "MDL", 3) == 0)
                    {
                        int val;
                        if (sscanf(convention_value, "%d", &val) != 1)
                            throw Error("error conventionValue attribute");
                        if (val == 3)
                        {
                            _bmol->cis_trans.ignore(bond_idx);
                        }
                    }
                }
            }

            const char* text = bs_elem->GetText();

            if (text == 0)
                continue;

            int parity;

            if (text[0] == 'C' && text[1] == 0)
                parity = MoleculeCisTrans::CIS;
            else if (text[0] == 'T' && text[1] == 0)
                parity = MoleculeCisTrans::TRANS;
            else
                continue;

            const char* atom_refs = bs_elem->Attribute("atomRefs4");
            // If there are only one substituent then atomRefs4 cano be omitted
            bool has_subst = atom_refs != nullptr;

            int substituents[4];

            if (!MoleculeCisTrans::isGeomStereoBond(*_bmol, bond_idx, substituents, false))
                throw Error("cis-trans notation on a non cis-trans bond #%d", bond_idx);

            if (!MoleculeCisTrans::sortSubstituents(*_bmol, substituents, 0))
                throw Error("cis-trans notation on a non cis-trans bond #%d", bond_idx);

            if (has_subst)
            {
                BufferScanner strscan(atom_refs);
                QS_DEF(Array<char>, id);
                int refs[4] = {-1, -1, -1, -1};

                for (int k = 0; k < 4; k++)
                {
                    strscan.skipSpace();
                    strscan.readWord(id, 0);

                    refs[k] = getAtomIdx(id.ptr());
                }

                const Edge& edge = _bmol->getEdge(bond_idx);

                if (refs[1] == edge.beg && refs[2] == edge.end)
                    ;
                else if (refs[1] == edge.end && refs[2] == edge.beg)
                {
                    std::swap(refs[0], refs[3]);
                    std::swap(refs[1], refs[2]);
                }
                else
                    throw Error("atomRefs4 in bondStereo do not match the bond ends");

                bool invert = false;

                if (refs[0] == substituents[0])
                    ;
                else if (refs[0] == substituents[1])
                    invert = !invert;
                else
                    throw Error("atomRefs4 in bondStereo do not match the substituents");

                if (refs[3] == substituents[2])
                    ;
                else if (refs[3] == substituents[3])
                    invert = !invert;
                else
                    throw Error("atomRefs4 in bondStereo do not match the substituents");

                if (invert)
                    parity = 3 - parity;
            }

            _bmol->cis_trans.add(bond_idx, substituents, parity);
        }
    }
    else if (BaseMolecule::hasCoord(*_bmol))
        _bmol->buildCisTrans(0);

    // Sgroups

    for (XMLElement* elem = handle.FirstChild().ToElement(); elem; elem = elem->NextSiblingElement())
    {
        if (strncmp(elem->Value(), "molecule", 8) != 0)
            continue;
        _loadSGroupElement(elem, atoms_id, 0);
    }
}