void RSmilesLoader::_loadReaction()

in core/indigo-core/reaction/src/rsmiles_loader.cpp [86:592]


void RSmilesLoader::_loadReaction()
{
    _brxn->clear();

    std::unique_ptr<BaseMolecule> mols[3];
    std::unique_ptr<BaseMolecule>& rcnt = mols[0];
    std::unique_ptr<BaseMolecule>& ctlt = mols[1];
    std::unique_ptr<BaseMolecule>& prod = mols[2];
    std::list<std::unordered_set<int>> mols_extNeibs[3];
    std::list<std::unordered_set<int>>& rcnt_extNeibs = mols_extNeibs[0];
    std::list<std::unordered_set<int>>& ctlt_extNeibs = mols_extNeibs[1];
    std::list<std::unordered_set<int>>& prod_extNeibs = mols_extNeibs[2];

    QS_DEF(Array<int>, rcnt_aam);
    QS_DEF(Array<int>, ctlt_aam);
    QS_DEF(Array<int>, prod_aam);
    QS_DEF(Array<int>, rcnt_aam_ignorable);
    QS_DEF(Array<int>, prod_aam_ignorable);
    QS_DEF(Array<char>, buf);
    Array<int>* aams[] = {&rcnt_aam, &ctlt_aam, &prod_aam};
    Array<int>* ignorable_aams[] = {&rcnt_aam_ignorable, 0, &prod_aam_ignorable};

    // read the reactants
    buf.clear();
    while (1)
    {
        char c = _scanner.readChar();

        if (c == '>')
            break;
        buf.push(c);
    }

    BufferScanner r_scanner(buf);
    SmilesLoader r_loader(r_scanner);

    r_loader.ignore_closing_bond_direction_mismatch = ignore_closing_bond_direction_mismatch;
    r_loader.inside_rsmiles = true;
    r_loader.ignorable_aam = &rcnt_aam_ignorable;
    r_loader.smarts_mode = smarts_mode;
    r_loader.ignore_cistrans_errors = ignore_cistrans_errors;
    r_loader.stereochemistry_options = stereochemistry_options;
    r_loader.ignore_bad_valence = ignore_bad_valence;

    if (_rxn != 0)
    {
        rcnt = std::make_unique<Molecule>();
        r_loader.loadMolecule(static_cast<Molecule&>(*rcnt));
    }
    else
    {
        rcnt = std::make_unique<QueryMolecule>();
        r_loader.loadQueryMolecule(static_cast<QueryMolecule&>(*rcnt));
        static_cast<QueryMolecule&>(*rcnt).getComponentNeighbors(rcnt_extNeibs);
    }
    rcnt_aam.copy(rcnt->reaction_atom_mapping);

    // read the catalysts (agents)
    buf.clear();
    while (1)
    {
        char c = _scanner.readChar();

        if (c == '>')
            break;
        buf.push(c);
    }

    BufferScanner c_scanner(buf);
    SmilesLoader c_loader(c_scanner);

    c_loader.ignore_closing_bond_direction_mismatch = ignore_closing_bond_direction_mismatch;
    c_loader.inside_rsmiles = true;
    c_loader.smarts_mode = smarts_mode;
    c_loader.ignore_cistrans_errors = ignore_cistrans_errors;
    c_loader.stereochemistry_options = stereochemistry_options;
    c_loader.ignore_bad_valence = ignore_bad_valence;

    if (_rxn != 0)
    {
        ctlt = std::make_unique<Molecule>();
        c_loader.loadMolecule(static_cast<Molecule&>(*ctlt));
    }
    else
    {
        ctlt = std::make_unique<QueryMolecule>();
        c_loader.loadQueryMolecule(static_cast<QueryMolecule&>(*ctlt));
        static_cast<QueryMolecule&>(*ctlt).getComponentNeighbors(ctlt_extNeibs);
    }
    ctlt_aam.copy(ctlt->reaction_atom_mapping);

    bool vbar = false;

    // read the products
    buf.clear();
    while (!_scanner.isEOF())
    {
        char c = _scanner.readChar();

        if (c == '|')
        {
            vbar = true;
            break;
        }
        buf.push(c);
    }

    BufferScanner p_scanner(buf);
    SmilesLoader p_loader(p_scanner);

    p_loader.ignore_closing_bond_direction_mismatch = ignore_closing_bond_direction_mismatch;
    p_loader.inside_rsmiles = true;
    p_loader.ignorable_aam = &prod_aam_ignorable;
    p_loader.smarts_mode = smarts_mode;
    p_loader.ignore_cistrans_errors = ignore_cistrans_errors;
    p_loader.stereochemistry_options = stereochemistry_options;
    p_loader.ignore_bad_valence = ignore_bad_valence;

    if (_rxn != 0)
    {
        prod = std::make_unique<Molecule>();
        p_loader.loadMolecule(static_cast<Molecule&>(*prod));
    }
    else
    {
        prod = std::make_unique<QueryMolecule>();
        p_loader.loadQueryMolecule(static_cast<QueryMolecule&>(*prod));
        static_cast<QueryMolecule&>(*prod).getComponentNeighbors(prod_extNeibs);
    }
    prod_aam.copy(prod->reaction_atom_mapping);

    QS_DEF(Array<int>, r_fragments);
    QS_DEF(Array<int>, c_fragments);
    QS_DEF(Array<int>, p_fragments);
    Array<int>* fragments[] = {&r_fragments, &c_fragments, &p_fragments};

    r_fragments.clear_resize(rcnt->countComponents(rcnt_extNeibs));
    c_fragments.clear_resize(ctlt->countComponents(ctlt_extNeibs));
    p_fragments.clear_resize(prod->countComponents(prod_extNeibs));

    for (int i = 0; i < r_fragments.size(); i++)
        r_fragments[i] = i;

    for (int i = 0; i < c_fragments.size(); i++)
        c_fragments[i] = i;

    for (int i = 0; i < p_fragments.size(); i++)
        p_fragments[i] = i;

    bool have_highlighting = false;

    QS_DEF(Array<int>, hl_atoms);
    QS_DEF(Array<int>, hl_bonds);

    hl_atoms.clear_resize(rcnt->vertexCount() + ctlt->vertexCount() + prod->vertexCount());
    hl_bonds.clear_resize(rcnt->edgeCount() + ctlt->edgeCount() + prod->edgeCount());
    hl_atoms.zerofill();
    hl_bonds.zerofill();

    if (vbar)
    {
        BaseMolecule* stereo[] = {rcnt.get(), ctlt.get(), prod.get()};

        while (1)
        {
            char c = _scanner.readChar();

            if (c == '|')
                break;

            if (c == 'w')
            {
                if (_scanner.readChar() != ':')
                    throw Error("colon expected after 'w'");

                while (isdigit(_scanner.lookNext()))
                {
                    int idx = _scanner.readUnsigned();

                    int group = _selectGroup(idx, rcnt->vertexCount(), ctlt->vertexCount(), prod->vertexCount());
                    stereo[group]->addStereocenters(idx, MoleculeStereocenters::ATOM_ANY, 0, false);

                    if (_scanner.lookNext() == ',')
                        _scanner.skip(1);
                }
            }
            else if (c == 'a')
            {
                if (_scanner.readChar() != ':')
                    throw Error("colon expected after 'a'");

                while (isdigit(_scanner.lookNext()))
                {
                    int idx = _scanner.readUnsigned();

                    int group = _selectGroup(idx, rcnt->vertexCount(), ctlt->vertexCount(), prod->vertexCount());
                    stereo[group]->stereocenters.setType(idx, MoleculeStereocenters::ATOM_ABS, 0);

                    if (_scanner.lookNext() == ',')
                        _scanner.skip(1);
                }
            }
            else if (c == 'o')
            {
                int groupno = _scanner.readUnsigned();

                if (_scanner.readChar() != ':')
                    throw Error("colon expected after 'o'");

                while (isdigit(_scanner.lookNext()))
                {
                    int idx = _scanner.readUnsigned();

                    int group = _selectGroup(idx, rcnt->vertexCount(), ctlt->vertexCount(), prod->vertexCount());
                    stereo[group]->stereocenters.setType(idx, MoleculeStereocenters::ATOM_OR, groupno);

                    if (_scanner.lookNext() == ',')
                        _scanner.skip(1);
                }
            }
            else if (c == '&')
            {
                int groupno = _scanner.readUnsigned();

                if (_scanner.readChar() != ':')
                    throw Error("colon expected after '&'");

                while (isdigit(_scanner.lookNext()))
                {
                    int idx = _scanner.readUnsigned();

                    int group = _selectGroup(idx, rcnt->vertexCount(), ctlt->vertexCount(), prod->vertexCount());
                    stereo[group]->stereocenters.setType(idx, MoleculeStereocenters::ATOM_AND, groupno);

                    if (_scanner.lookNext() == ',')
                        _scanner.skip(1);
                }
            }
            else if (c == '^')
            {
                int rad = _scanner.readIntFix(1);
                int radical;

                if (rad == 1)
                    radical = RADICAL_DOUBLET;
                else if (rad == 3)
                    radical = RADICAL_SINGLET;
                else if (rad == 4)
                    radical = RADICAL_TRIPLET;
                else
                    throw Error("unsupported radical number: %d", rad);

                if (_scanner.readChar() != ':')
                    throw Error("colon expected after radical number");

                while (isdigit(_scanner.lookNext()))
                {
                    int idx = _scanner.readUnsigned();

                    int group = _selectGroup(idx, rcnt->vertexCount(), ctlt->vertexCount(), prod->vertexCount());

                    if (_rxn != 0)
                        (dynamic_cast<Molecule&>(*mols[group])).setAtomRadical(idx, radical);
                    else
                    {
                        QueryMolecule& qmol = dynamic_cast<QueryMolecule&>(*mols[group]);
                        qmol.resetAtom(idx, (QueryMolecule::Atom*)QueryMolecule::Atom::und(qmol.releaseAtom(idx),
                                                                                           new QueryMolecule::Atom(QueryMolecule::ATOM_RADICAL, radical)));
                    }

                    if (_scanner.lookNext() == ',')
                        _scanner.skip(1);
                }
            }
            else if (c == 'f')
            {
                if (_scanner.readChar() != ':')
                    throw Error("colon expected after 'f'");

                while (isdigit(_scanner.lookNext()))
                {
                    int idx = _scanner.readUnsigned();

                    while (_scanner.lookNext() == '.')
                    {
                        _scanner.skip(1);

                        int idx1 = idx;
                        int index_in_group = _scanner.readUnsigned();
                        int group = _selectGroupByPair(idx1, index_in_group, r_fragments.size(), c_fragments.size(), p_fragments.size());
                        (*fragments[group])[index_in_group] = idx1;
                    }

                    if (_scanner.lookNext() == ',')
                        _scanner.skip(1);
                }
            }
            else if (c == '$')
            {
                int k = rcnt->vertexCount() + ctlt->vertexCount() + prod->vertexCount();
                QS_DEF(Array<char>, label);

                for (int i = 0; i < k; i++)
                {
                    label.clear();

                    while (1)
                    {
                        if (_scanner.isEOF())
                            throw Error("end of input while reading $...$ block");
                        c = _scanner.readChar();
                        if (c == ';' || c == '$')
                            break;
                        label.push(c);
                    }
                    if (c == '$' && i != k - 1)
                        throw Error("only %d atoms found in pseudo-atoms $...$ block", i + 1);
                    if (label.size() > 0)
                    {
                        label.push(0);

                        int idx = i;
                        int group = _selectGroup(idx, rcnt->vertexCount(), ctlt->vertexCount(), prod->vertexCount());
                        int rnum;

                        if (label.size() > 3 && label[0] == '_' && label[1] == 'R' && sscanf(label.ptr() + 2, "%d", &rnum) == 1)
                        {
                            // ChemAxon's Extended SMILES notation for R-sites
                            if (_qrxn != 0)
                            {
                                QueryMolecule& qmol = dynamic_cast<QueryMolecule&>(*mols[group]);
                                qmol.resetAtom(idx, new QueryMolecule::Atom(QueryMolecule::ATOM_RSITE, 0));
                            }
                            mols[group]->allowRGroupOnRSite(idx, rnum);
                        }
                        else
                        {
                            if (_rxn != 0)
                            {
                                Molecule& mol = dynamic_cast<Molecule&>(*mols[group]);
                                const auto atomNumber = mol.getAtomNumber(idx);
                                if (ELEM_MIN < atomNumber && atomNumber < ELEM_MAX)
                                {
                                    mol.setAlias(idx, label.ptr());
                                }
                                else
                                {
                                    mol.setPseudoAtom(idx, label.ptr());
                                }
                            }
                            else
                            {
                                QueryMolecule& qmol = dynamic_cast<QueryMolecule&>(*mols[group]);
                                const auto atomNumber = qmol.getAtomNumber(idx);
                                if (ELEM_MIN < atomNumber && atomNumber < ELEM_MAX)
                                {
                                    qmol.setAlias(idx, label.ptr());
                                }
                                else
                                {
                                    qmol.resetAtom(idx, (QueryMolecule::Atom*)QueryMolecule::Atom::und(
                                                            qmol.releaseAtom(idx), new QueryMolecule::Atom(QueryMolecule::ATOM_PSEUDO, label.ptr())));
                                }
                            }
                        }
                    }
                }
            }
            else if (c == 'h')
            {
                have_highlighting = true;

                c = _scanner.readChar();

                int a = false;

                if (c == 'a')
                    a = true;
                else if (c != 'b')
                    throw Error("expected 'a' or 'b' after 'h', got '%c'", c);

                if (_scanner.readChar() != ':')
                    throw Error("colon expected after 'h%c'", a ? 'a' : 'b');

                while (isdigit(_scanner.lookNext()))
                {
                    int idx = _scanner.readUnsigned();

                    if (a)
                        hl_atoms[idx] = 1;
                    else
                        hl_bonds[idx] = 1;

                    if (_scanner.lookNext() == ',')
                        _scanner.skip(1);
                }
            }
        }
    }

    // Read name
    Scanner* scanner_for_name;
    if (vbar)
        scanner_for_name = &_scanner;
    else
        scanner_for_name = &p_scanner;

    scanner_for_name->skipSpace();
    if (!scanner_for_name->isEOF())
        scanner_for_name->readLine(_brxn->name, true);

    std::unique_ptr<BaseMolecule> mol;
    QS_DEF(Array<int>, aam);
    QS_DEF(Array<int>, ignorable_aam);
    QS_DEF(Array<int>, mapping);
    QS_DEF(Array<int>, hl_atoms_frag);
    QS_DEF(Array<int>, hl_bonds_frag);

    if (_rxn != 0)
        mol = std::make_unique<Molecule>();
    else
        mol = std::make_unique<QueryMolecule>();

    for (int v = 0; v < 3; ++v)
    {
        for (int i = 0; i < fragments[v]->size(); i++)
        {
            if ((*fragments[v])[i] == -1)
                continue;

            mol->clear();
            aam.clear();
            ignorable_aam.clear();
            hl_atoms_frag.clear();
            hl_bonds_frag.clear();

            for (int j = i; j < fragments[v]->size(); j++)
            {
                std::unique_ptr<BaseMolecule> fragment;

                if (_rxn != 0)
                    fragment = std::make_unique<Molecule>();
                else
                    fragment = std::make_unique<QueryMolecule>();

                if ((*fragments[v])[j] == i)
                {
                    (*fragments[v])[j] = -1;
                    Filter filt(mols[v]->getDecomposition().ptr(), Filter::EQ, j);
                    fragment->makeSubmolecule(*mols[v], filt, &mapping, 0);

                    mol->mergeWithMolecule(*fragment, 0);

                    for (int k = 0; k < fragment->vertexCount(); k++)
                    {
                        aam.push((*aams[v])[mapping[k]]);
                        if (ignorable_aams[v] != 0)
                            ignorable_aam.push((*ignorable_aams[v])[mapping[k]]);

                        int idx = mapping[k];

                        for (int w = 0; w < v; w++)
                            idx += mols[w]->vertexCount();

                        hl_atoms_frag.push(hl_atoms[idx]);
                    }

                    for (int k = 0; k < fragment->edgeCount(); k++)
                    {
                        const Edge& edge = fragment->getEdge(k);

                        int idx = mols[v]->findEdgeIndex(mapping[edge.beg], mapping[edge.end]);

                        if (idx < 0)
                            throw Error("internal: can not find edge");

                        for (int w = 0; w < v; w++)
                            idx += mols[w]->edgeCount();

                        hl_bonds_frag.push(hl_bonds[idx]);
                    }
                }
            }

            int idx = 0; // TODO: investigate right value for default idx
            if (v == 0)
                idx = _brxn->addReactantCopy(*mol, 0, 0);
            else if (v == 1)
                idx = _brxn->addCatalystCopy(*mol, 0, 0);
            else if (v == 2)
                idx = _brxn->addProductCopy(*mol, 0, 0);

            _brxn->getAAMArray(idx).copy(aam);
            if (_qrxn != 0)
                _qrxn->getIgnorableAAMArray(idx).copy(ignorable_aam);

            if (have_highlighting)
            {
                Filter vfilter(hl_atoms_frag.ptr(), Filter::NEQ, 0);
                Filter efilter(hl_bonds_frag.ptr(), Filter::NEQ, 0);

                _brxn->getBaseMolecule(idx).highlightAtoms(vfilter);
                _brxn->getBaseMolecule(idx).highlightBonds(efilter);
            }
        }
    }
}