core/indigo-core/molecule/src/molecule_dearom.cpp (1,280 lines of code) (raw):
/****************************************************************************
* Copyright (C) from 2009 to Present EPAM Systems.
*
* This file is part of Indigo toolkit.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
***************************************************************************/
#include "molecule/molecule_dearom.h"
#include "base_c/bitarray.h"
#include "base_cpp/output.h"
#include "base_cpp/scanner.h"
#include "graph/filter.h"
#include "molecule/elements.h"
#include "molecule/molecule.h"
#include "molecule/molecule_arom.h"
#include "molecule/query_molecule.h"
using namespace indigo;
// Exceptions
IMPL_EXCEPTION(indigo, DearomatizationException, "dearomatization");
IMPL_EXCEPTION2(indigo, NonUniqueDearomatizationException, DearomatizationException, "non-unique dearomatization");
//
// Indigo aromaticiy model remarks.
// C1=CC2=CC=CC=CC2=C1 is aromatized into c1:c2-c(:c:c:c:c:c:2):c:c:1 but not into
// c1cc2cccccc2c1 because single bond is not aromatic.
//
// O=C1C=CC(=O)C2=C1SC=CS2 is not aromatized to O=c1ccc(=O)c2sccsc12 because
// of double bond inside cycle that is prohibited. Big cycle has 4n+2 electrons
// but is not treated as aromatic because of this double bond.
//
thread_local int _dearomatizationParams = Dearomatizer::PARAMS_SAVE_ONE_DEAROMATIZATION;
CP_DEF(Dearomatizer);
Dearomatizer::Dearomatizer(BaseMolecule& molecule, const int* atom_external_conn, const AromaticityOptions& options)
: _graphMatching(molecule), _molecule(molecule), _options(options), _aromaticGroups(molecule, options.aromatize_skip_superatoms), CP_INIT,
TL_CP_GET(_aromaticGroupData),
// TL_CP_GET(_edgesFixed),
// TL_CP_GET(_verticesFixed),
TL_CP_GET(_submoleculeMapping)
{
_isQueryMolecule = _molecule.isQueryMolecule();
_edgesFixed.resize(_molecule.edgeEnd());
_verticesFixed.resize(_molecule.vertexEnd());
_verticesFixed.zeroFill();
_connectivityGroups = _aromaticGroups.detectAromaticGroups(atom_external_conn);
_initVertices();
_initEdges();
_graphMatching.setFixedInfo(&_edgesFixed, &_verticesFixed);
}
Dearomatizer::~Dearomatizer()
{
}
void Dearomatizer::setDearomatizationParams(int params)
{
_dearomatizationParams = params;
}
// Enumerate all dearomatizations for all connectivity groups
void Dearomatizer::enumerateDearomatizations(DearomatizationsStorage& dearomatizations)
{
dearomatizations.clear();
if (_connectivityGroups == 0)
return;
_dearomatizations = &dearomatizations;
Molecule submolecule;
QueryMolecule qsubmolecule;
dearomatizations.setGroupsCount(_connectivityGroups);
dearomatizations.setDearomatizationParams(static_cast<byte>(_dearomatizationParams));
_aromaticGroups.constructGroups(dearomatizations, true);
for (int group = 0; group < _connectivityGroups; group++)
{
_activeGroup = group;
if (_isQueryMolecule)
_prepareGroup(group, qsubmolecule);
else
_prepareGroup(group, submolecule);
GrayCodesEnumerator grayCodes(_aromaticGroupData.heteroAtoms.size(), true);
do
{
if (_graphMatching.findMatching())
if (_isQueryMolecule)
_processMatching(qsubmolecule, group, grayCodes.getCode());
else
_processMatching(submolecule, group, grayCodes.getCode());
grayCodes.next();
if (!grayCodes.isDone())
{
int heteroAtomToInvert = _aromaticGroupData.heteroAtoms[grayCodes.getBitChangeIndex()];
_fixHeteratom(heteroAtomToInvert, !_verticesFixed.get(heteroAtomToInvert));
}
} while (!grayCodes.isDone());
}
}
void Dearomatizer::_fixHeteratom(int atom_idx, bool /* toFix */)
{
if (!_verticesFixed.get(atom_idx))
{
if (_graphMatching.isVertexInMatching(atom_idx))
_graphMatching.removeVertexFromMatching(atom_idx);
_verticesFixed.set(atom_idx);
}
else
_verticesFixed.reset(atom_idx);
return;
}
void Dearomatizer::_initVertices(void)
{
for (int v_idx = _molecule.vertexBegin(); v_idx < _molecule.vertexEnd(); v_idx = _molecule.vertexNext(v_idx))
{
if (_molecule.getAtomAromaticity(v_idx) == ATOM_ALIPHATIC)
_verticesFixed.set(v_idx);
}
}
// Find all aromatic bonds
void Dearomatizer::_initEdges(void)
{
for (int e_idx = _molecule.edgeBegin(); e_idx < _molecule.edgeEnd(); e_idx = _molecule.edgeNext(e_idx))
{
bool non_aromatic = _molecule.getBondOrder(e_idx) != BOND_AROMATIC;
if (non_aromatic && _isQueryMolecule)
non_aromatic = !_molecule.asQueryMolecule().possibleAromaticBond(e_idx);
_edgesFixed.set(e_idx, non_aromatic);
}
}
void Dearomatizer::_enumerateMatching(void)
{
// Find strong edge in alternating circle
const Edge* edge = 0;
int e_idx = -1;
bool found = false;
for (int i = 0; i < _aromaticGroupData.bonds.size(); i++)
{
e_idx = _aromaticGroupData.bonds[i];
if (!_edgesFixed.get(e_idx) && _graphMatching.isEdgeMatching(e_idx))
{
edge = &(_molecule.getEdge(e_idx));
if (_graphMatching.findAlternatingPath(edge->beg, edge->end, false, false))
{
found = true;
break;
}
}
}
if (!found)
{
_handleMatching();
return;
}
const int MAX_PATH_SIZE = 100;
int pathSize = _graphMatching.getPathSize();
int path[MAX_PATH_SIZE];
memcpy(path, _graphMatching.getPath(), sizeof(int) * pathSize);
// Enumerate all matching with this strong edge
_verticesFixed.set(edge->beg);
_verticesFixed.set(edge->end);
_enumerateMatching();
_verticesFixed.reset(edge->beg);
_verticesFixed.reset(edge->end);
// Enumerate all matching without this strong edge
_graphMatching.setPath(path, pathSize);
_graphMatching.setEdgeMatching(e_idx, false);
_graphMatching.processPath();
_edgesFixed.set(e_idx);
_enumerateMatching();
_edgesFixed.reset(e_idx);
_graphMatching.setPath(path, pathSize);
_graphMatching.processPath();
_graphMatching.setEdgeMatching(e_idx, true);
}
void Dearomatizer::_handleMatching(void)
{
// Add dearomatizations
_dearomatizations->addGroupDearomatization(_activeGroup, _graphMatching.getEdgesState());
}
static void dearomatizeQueryBond(QueryMolecule& qmol, int idx, int order)
{
qmol.resetBond(idx, new QueryMolecule::Bond(QueryMolecule::BOND_ORDER, order));
// Clear 'Aromatic' at bond vertex. Disabled for #1472.
// Edge edg = qmol.getEdge(idx);
// if (qmol.getAtom(edg.beg).hasConstraintWithValue(QueryMolecule::ATOM_AROMATICITY, ATOM_AROMATIC))
// qmol.getAtom(edg.beg).removeConstraints(QueryMolecule::ATOM_AROMATICITY);
// if (qmol.getAtom(edg.end).hasConstraintWithValue(QueryMolecule::ATOM_AROMATICITY, ATOM_AROMATIC))
// qmol.getAtom(edg.end).removeConstraints(QueryMolecule::ATOM_AROMATICITY);
}
void Dearomatizer::_processMatching(BaseMolecule& submolecule, int group, const byte* hetroAtomsState)
{
bool isQuery = submolecule.isQueryMolecule();
// Copy bonds
for (int e_idx = submolecule.edgeBegin(); e_idx < submolecule.edgeEnd(); e_idx = submolecule.edgeNext(e_idx))
{
if (submolecule.getBondTopology(e_idx) != TOPOLOGY_RING)
// Do not change any bond orders that are not in rings
continue;
const Edge& edge = submolecule.getEdge(e_idx);
int supIdx = _molecule.findEdgeIndex(_submoleculeMapping[edge.beg], _submoleculeMapping[edge.end]);
if (_graphMatching.isEdgeMatching(supIdx))
if (isQuery)
dearomatizeQueryBond(submolecule.asQueryMolecule(), e_idx, BOND_DOUBLE);
else
submolecule.asMolecule().setBondOrder(e_idx, BOND_DOUBLE);
else if (isQuery)
dearomatizeQueryBond(submolecule.asQueryMolecule(), e_idx, BOND_SINGLE);
else
submolecule.asMolecule().setBondOrder(e_idx, BOND_SINGLE);
}
// Check aromaticity
bool valid = true;
if (_options.dearomatize_check)
{
// Check configuration only if antiaromaticity is not allowed
// For example structure c1ccc1 is not aromatic but antiaromatic, and
// kekulized form is C1=CC=C1
// Dearomatization without verification can be used for finding kekulized form
// that is not necessary aromatic
if (isQuery)
QueryMoleculeAromatizer::aromatizeBonds(submolecule.asQueryMolecule(), _options);
else
MoleculeAromatizer::aromatizeBonds(submolecule.asMolecule(), _options);
for (int e_idx = submolecule.edgeBegin(); e_idx < submolecule.edgeEnd(); e_idx = submolecule.edgeNext(e_idx))
{
if (submolecule.getBondTopology(e_idx) == TOPOLOGY_RING && submolecule.getBondOrder(e_idx) != BOND_AROMATIC)
{
valid = false;
break;
}
}
}
if (valid)
{
if (_dearomatizationParams == PARAMS_SAVE_ALL_DEAROMATIZATIONS)
// Enumerate all equivalent dearomatizations
_enumerateMatching();
else if (_dearomatizationParams == PARAMS_SAVE_ONE_DEAROMATIZATION)
_handleMatching();
else if (_dearomatizationParams == PARAMS_SAVE_JUST_HETERATOMS)
_dearomatizations->addGroupHeteroAtomsState(group, hetroAtomsState);
}
}
void Dearomatizer::_prepareGroup(int group, BaseMolecule& submolecule)
{
_aromaticGroups.getGroupData(group, DearomatizationsGroups::GET_VERTICES_FILTER | DearomatizationsGroups::GET_HETERATOMS_INDICES, &_aromaticGroupData);
Filter filter(_aromaticGroupData.verticesFilter.ptr(), Filter::EQ, 1);
submolecule.makeSubmolecule(_molecule, filter, &_submoleculeMapping, NULL, SKIP_ALL);
// Remove non-aromatic bonds
for (int e_idx = submolecule.edgeBegin(); e_idx < submolecule.edgeEnd(); e_idx = submolecule.edgeNext(e_idx))
{
// Keep double bonds too
if (submolecule.getBondOrder(e_idx) == BOND_SINGLE)
submolecule.removeEdge(e_idx);
}
for (int i = 0; i < _aromaticGroupData.vertices.size(); i++)
{
int v_idx = _aromaticGroupData.vertices[i];
if (!_aromaticGroups.isAcceptDoubleBond(v_idx))
_verticesFixed.set(v_idx);
else
_verticesFixed.reset(v_idx);
}
for (int i = 0; i < _aromaticGroupData.heteroAtoms.size(); i++)
{
int hetero_idx = _aromaticGroupData.heteroAtoms[i];
_verticesFixed.set(hetero_idx);
}
_graphMatching.reset();
_graphMatching.setEdgesMappingPtr(_aromaticGroupData.bondsInvMapping.ptr());
_graphMatching.setVerticesSetPtr(_aromaticGroupData.vertices.ptr(), _aromaticGroupData.vertices.size());
}
//
// Dearomatizer::DearomatizerGraphMatching
//
bool Dearomatizer::GraphMatchingFixed::checkVertex(int v_idx)
{
return !_verticesFixed->get(v_idx);
}
bool Dearomatizer::GraphMatchingFixed::checkEdge(int e_idx)
{
return !_edgesFixed->get(e_idx);
}
void Dearomatizer::GraphMatchingFixed::setFixedInfo(const Dbitset* edgesFixed, const Dbitset* verticesFixed)
{
_edgesFixed = edgesFixed;
_verticesFixed = verticesFixed;
}
Dearomatizer::GraphMatchingFixed::GraphMatchingFixed(BaseMolecule& molecule) : GraphPerfectMatching(molecule, USE_VERTICES_SET | USE_EDGES_MAPPING)
{
}
//
// Dearomatizations
//
void DearomatizationsStorage::clear(void)
{
_heteroAtomsStateArray.clear();
_aromaticGroups.clear();
clearIndices();
clearBondsState();
_dearomParams = Dearomatizer::PARAMS_NO_DEAROMATIZATIONS;
}
void DearomatizationsStorage::clearIndices(void)
{
_aromBondsArray.clear();
_heteroAtomsIndicesArray.clear();
}
void DearomatizationsStorage::clearBondsState(void)
{
_dearomBondsStateArray.clear();
for (int i = 0; i < _aromaticGroups.size(); i++)
{
_aromaticGroups[i].dearomBondsState.count = 0;
_aromaticGroups[i].dearomBondsState.offset = 0;
}
}
void DearomatizationsStorage::setGroupsCount(int groupsCount)
{
_aromaticGroups.resize(groupsCount);
_aromaticGroups.zerofill();
}
void DearomatizationsStorage::setGroup(int group, int boundsCount, const int* bondsPtr, int heteroAtomsCount, const int* hetroAtoms)
{
_aromaticGroups[group].aromBondsIndices.count = boundsCount;
_aromaticGroups[group].aromBondsIndices.offset = _aromBondsArray.size();
if (_dearomParams == Dearomatizer::PARAMS_SAVE_JUST_HETERATOMS)
{
_aromaticGroups[group].heteroAtomsIndices.count = heteroAtomsCount;
_aromaticGroups[group].heteroAtomsIndices.offset = _heteroAtomsIndicesArray.size();
for (int i = 0; i < heteroAtomsCount; i++)
_heteroAtomsIndicesArray.push(hetroAtoms[i]);
}
else
{
_aromaticGroups[group].heteroAtomsIndices.count = 0;
_aromaticGroups[group].heteroAtomsIndices.offset = _heteroAtomsIndicesArray.size();
}
for (int i = 0; i < boundsCount; i++)
_aromBondsArray.push(bondsPtr[i]);
}
void DearomatizationsStorage::addGroupDearomatization(int group, const byte* dearomBondsState)
{
// Check group
int dearomStateSize = bitGetSize(_aromaticGroups[group].aromBondsIndices.count);
int expectedOffset = _dearomBondsStateArray.size() - dearomStateSize * _aromaticGroups[group].dearomBondsState.count;
if (_aromaticGroups[group].dearomBondsState.count != 0 && _aromaticGroups[group].dearomBondsState.offset != expectedOffset)
throw Error("Dearomatizations::addGroupDearomatization: unable to add dearomatization");
if (_aromaticGroups[group].dearomBondsState.count == 0)
_aromaticGroups[group].dearomBondsState.offset = _dearomBondsStateArray.size();
// Add dearomatization to group
for (int i = 0; i < dearomStateSize; i++)
_dearomBondsStateArray.push(dearomBondsState[i]);
_aromaticGroups[group].dearomBondsState.count++;
}
void DearomatizationsStorage::addGroupHeteroAtomsState(int group, const byte* heteroAtomsState)
{
// Check group
int heteroAtomsSize = bitGetSize(_aromaticGroups[group].heteroAtomsIndices.count);
int expectedOffset = _heteroAtomsStateArray.size() - heteroAtomsSize * _aromaticGroups[group].heteroAtomsState.count;
if (_aromaticGroups[group].heteroAtomsState.count != 0 && _aromaticGroups[group].heteroAtomsState.offset != expectedOffset)
throw Error("Dearomatizations::addGroupHeteroAtomsState: unable to add heteroatoms state");
if (_aromaticGroups[group].heteroAtomsState.count == 0)
_aromaticGroups[group].heteroAtomsState.offset = _heteroAtomsStateArray.size();
for (int i = 0; i < heteroAtomsSize; i++)
_heteroAtomsStateArray.push(heteroAtomsState[i]);
_aromaticGroups[group].heteroAtomsState.count++;
}
// Bonds state for dearomatization
int DearomatizationsStorage::getGroupDearomatizationsCount(int group) const
{
return _aromaticGroups[group].dearomBondsState.count;
}
byte* DearomatizationsStorage::getGroupDearomatization(int group, int dearomatizationIndex)
{
int offset = _aromaticGroups[group].dearomBondsState.offset + dearomatizationIndex * bitGetSize(_aromaticGroups[group].aromBondsIndices.count);
if (offset >= _dearomBondsStateArray.size())
return 0;
return &_dearomBondsStateArray[offset];
}
const int* DearomatizationsStorage::getGroupBonds(int group) const
{
int offset = _aromaticGroups[group].aromBondsIndices.offset;
if (offset >= _aromBondsArray.size())
return 0;
return &_aromBondsArray[offset];
}
int DearomatizationsStorage::getGroupBondsCount(int group) const
{
return _aromaticGroups[group].aromBondsIndices.count;
}
int DearomatizationsStorage::getGroupsCount(void) const
{
return _aromaticGroups.size();
}
// Heteroatoms
int DearomatizationsStorage::getGroupHeterAtomsStateCount(int group) const
{
return _aromaticGroups[group].heteroAtomsState.count;
}
const byte* DearomatizationsStorage::getGroupHeterAtomsState(int group, int index) const
{
int offset = _aromaticGroups[group].heteroAtomsState.offset + index * bitGetSize(_aromaticGroups[group].heteroAtomsIndices.count);
return _heteroAtomsStateArray.ptr() + offset;
}
const int* DearomatizationsStorage::getGroupHeteroAtoms(int group) const
{
return _heteroAtomsIndicesArray.ptr() + _aromaticGroups[group].heteroAtomsIndices.offset;
}
int DearomatizationsStorage::getGroupHeteroAtomsCount(int group) const
{
return _aromaticGroups[group].heteroAtomsIndices.count;
}
// I/O
void DearomatizationsStorage::saveBinary(Output& output) const
{
output.writeByte(_dearomParams);
output.writePackedShort(static_cast<short>(_aromaticGroups.size()));
if (_dearomParams != Dearomatizer::PARAMS_SAVE_JUST_HETERATOMS)
{
for (int i = 0; i < _aromaticGroups.size(); i++)
{
int expectedOffset = 0;
if (i != 0)
expectedOffset = _aromaticGroups[i - 1].dearomBondsState.offset +
_aromaticGroups[i - 1].dearomBondsState.count * bitGetSize(_aromaticGroups[i - 1].aromBondsIndices.count);
if (i != 0 && _aromaticGroups[i].dearomBondsState.offset != expectedOffset)
throw Error("DearomatizationsStorage::saveBinary: invalid data order #1");
output.writePackedShort(static_cast<short>(_aromaticGroups[i].dearomBondsState.count));
}
output.writePackedShort(static_cast<short>(_dearomBondsStateArray.size()));
if (_dearomBondsStateArray.size() != 0)
output.write(_dearomBondsStateArray.ptr(), _dearomBondsStateArray.size() * sizeof(byte));
}
else
{
for (int i = 0; i < _aromaticGroups.size(); i++)
{
int expectedOffset = 0;
if (i != 0)
expectedOffset = _aromaticGroups[i - 1].heteroAtomsState.offset +
_aromaticGroups[i - 1].heteroAtomsState.count * bitGetSize(_aromaticGroups[i - 1].heteroAtomsIndices.count);
if (i != 0 && _aromaticGroups[i].heteroAtomsState.offset != expectedOffset)
throw Error("DearomatizationsStorage::saveBinary: invalid data order #2");
output.writePackedShort(static_cast<short>(_aromaticGroups[i].heteroAtomsState.count));
}
output.writePackedShort(static_cast<short>(_heteroAtomsStateArray.size()));
if (_heteroAtomsStateArray.size() != 0)
output.write(_heteroAtomsStateArray.ptr(), _heteroAtomsStateArray.size() * sizeof(byte));
}
}
void DearomatizationsStorage::loadBinary(Scanner& scanner)
{
clear();
_dearomParams = scanner.readChar();
short groupsCount = scanner.readPackedShort();
_aromaticGroups.resize(groupsCount);
_aromaticGroups.zerofill();
if (_dearomParams != Dearomatizer::PARAMS_SAVE_JUST_HETERATOMS)
{
for (int i = 0; i < groupsCount; i++)
{
short count = scanner.readPackedShort();
if (i != 0)
_aromaticGroups[i].dearomBondsState.offset = _aromaticGroups[i - 1].dearomBondsState.offset + count;
_aromaticGroups[i].dearomBondsState.count = count;
}
short bondsStateSize = scanner.readPackedShort();
_dearomBondsStateArray.resize(bondsStateSize);
if (bondsStateSize != 0)
scanner.read(bondsStateSize, _dearomBondsStateArray.ptr());
}
else
{
for (int i = 0; i < groupsCount; i++)
{
short count = scanner.readPackedShort();
if (i != 0)
_aromaticGroups[i].heteroAtomsState.offset = _aromaticGroups[i - 1].heteroAtomsState.offset + count;
_aromaticGroups[i].heteroAtomsState.count = count;
}
short heteroAtomsStateSize = scanner.readPackedShort();
_heteroAtomsStateArray.resize(heteroAtomsStateSize);
if (heteroAtomsStateSize)
scanner.read(heteroAtomsStateSize, _heteroAtomsStateArray.ptr());
}
}
IMPL_ERROR2(DearomatizationsStorage, DearomatizationException, "Dearomatization storage");
DearomatizationsStorage::DearomatizationsStorage(void)
{
_dearomParams = Dearomatizer::PARAMS_NO_DEAROMATIZATIONS;
}
//
// DearomatizationsGroups
//
IMPL_ERROR2(DearomatizationsGroups, DearomatizationException, "Dearomatization groups");
CP_DEF(DearomatizationsGroups);
DearomatizationsGroups::DearomatizationsGroups(BaseMolecule& molecule, bool skip_superatoms)
: _molecule(molecule), CP_INIT, TL_CP_GET(_vertexAromaticGroupIndex), TL_CP_GET(_vertexIsAcceptDoubleEdge), TL_CP_GET(_vertexIsAcceptSingleEdge),
TL_CP_GET(_vertexProcessed), TL_CP_GET(_groupVertices), TL_CP_GET(_groupEdges), TL_CP_GET(_groupHeteroAtoms), TL_CP_GET(_groupData)
{
_isQueryMolecule = _molecule.isQueryMolecule();
// collect superatoms
if (skip_superatoms)
for (int i = molecule.sgroups.begin(); i != molecule.sgroups.end(); i = molecule.sgroups.next(i))
{
SGroup& sgroup = molecule.sgroups.getSGroup(i);
if (sgroup.sgroup_type == SGroup::SG_TYPE_SUP)
{
for (int j = 0; j < sgroup.atoms.size(); j++)
_inside_superatoms.find_or_insert(sgroup.atoms[j]);
}
}
}
void DearomatizationsGroups::getGroupData(int group, int flags, DearomatizationsGroups::GROUP_DATA* data)
{
data->bonds.clear();
data->bondsInvMapping.resize(_molecule.edgeEnd());
data->heteroAtoms.clear();
data->vertices.clear();
if (flags & GET_VERTICES_FILTER)
{
data->verticesFilter.resize(_molecule.vertexEnd());
data->verticesFilter.zerofill();
}
for (int v_idx = _molecule.vertexBegin(); v_idx < _molecule.vertexEnd(); v_idx = _molecule.vertexNext(v_idx))
{
if (_vertexAromaticGroupIndex[v_idx] != group)
continue;
data->vertices.push(v_idx);
if (flags & GET_VERTICES_FILTER)
data->verticesFilter[v_idx] = 1;
if (flags & GET_HETERATOMS_INDICES)
{
// Check if atom have lone pair or vacant orbital
int lonepairs;
int label = _molecule.getAtomNumber(v_idx);
int charge = _molecule.getAtomCharge(v_idx);
int radical = _molecule.getAtomRadical_NoThrow(v_idx, -1);
// Treat unset charge and radical as zero;
// We have checked before in detectAromaticGroups()
if (charge == CHARGE_UNKNOWN)
charge = 0;
if (radical == -1)
radical = 0;
if (label == -1)
continue;
int max_conn = Element::getMaximumConnectivity(label, charge, radical, false);
int atom_group = Element::group(_molecule.getAtomNumber(v_idx));
int vac = _molecule.getVacantPiOrbitals(atom_group, charge, radical, max_conn, &lonepairs);
if (_vertexIsAcceptDoubleEdge[v_idx] && _vertexIsAcceptSingleEdge[v_idx] && (vac > 0 || lonepairs > 0))
data->heteroAtoms.push(v_idx);
}
}
memset(data->bondsInvMapping.ptr(), -1, sizeof(int) * data->bondsInvMapping.size());
for (int e_idx = _molecule.edgeBegin(); e_idx < _molecule.edgeEnd(); e_idx = _molecule.edgeNext(e_idx))
{
const Edge& edge = _molecule.getEdge(e_idx);
int bond_order = _molecule.getBondOrder(e_idx);
if (bond_order < 0 && _isQueryMolecule && _molecule.asQueryMolecule().possibleAromaticBond(e_idx))
bond_order = BOND_AROMATIC;
if (bond_order == BOND_AROMATIC && _vertexAromaticGroupIndex[edge.beg] == group)
{
data->bonds.push(e_idx);
data->bondsInvMapping[e_idx] = data->bonds.size() - 1;
}
}
}
// Construct bondsInvMapping, vertices and heteroAtomsInvMapping
void DearomatizationsGroups::getGroupDataFromStorage(DearomatizationsStorage& storage, int group, GROUP_DATA* data)
{
data->bondsInvMapping.resize(_molecule.edgeEnd());
data->vertices.clear();
data->heteroAtomsInvMapping.resize(_molecule.vertexEnd());
_vertexProcessed.resize(_molecule.vertexEnd());
_vertexProcessed.zerofill();
memset(data->bondsInvMapping.ptr(), -1, sizeof(int) * data->bondsInvMapping.size());
memset(data->heteroAtomsInvMapping.ptr(), -1, sizeof(int) * data->heteroAtomsInvMapping.size());
int bondsCount = storage.getGroupBondsCount(group);
const int* bonds = storage.getGroupBonds(group);
for (int i = 0; i < bondsCount; i++)
{
int e_idx = bonds[i];
data->bondsInvMapping[e_idx] = i;
const Edge& edge = _molecule.getEdge(e_idx);
if (!_vertexProcessed[edge.beg])
{
data->vertices.push(edge.beg);
_vertexProcessed[edge.beg] = 1;
}
if (!_vertexProcessed[edge.end])
{
data->vertices.push(edge.end);
_vertexProcessed[edge.end] = 1;
}
}
int heteroAtomsCount = storage.getGroupHeteroAtomsCount(group);
const int* heteroAtoms = storage.getGroupHeteroAtoms(group);
for (int i = 0; i < heteroAtomsCount; i++)
{
int h_idx = heteroAtoms[i];
data->heteroAtomsInvMapping[h_idx] = i;
}
}
int DearomatizationsGroups::detectAromaticGroups(const int* atom_external_conn)
{
_vertexAromaticGroupIndex.resize(_molecule.vertexEnd());
_vertexIsAcceptDoubleEdge.resize(_molecule.vertexEnd());
_vertexIsAcceptSingleEdge.resize(_molecule.vertexEnd());
memset(_vertexAromaticGroupIndex.ptr(), -1, _vertexAromaticGroupIndex.size() * sizeof(int));
int currentAromaticGroup = 0;
QueryMolecule* qmol = nullptr;
if (_isQueryMolecule)
qmol = &_molecule.asQueryMolecule();
for (int v_idx = _molecule.vertexBegin(); v_idx < _molecule.vertexEnd(); v_idx = _molecule.vertexNext(v_idx))
{
if (_inside_superatoms.find(v_idx))
continue;
if (_vertexAromaticGroupIndex[v_idx] != -1)
continue;
if ((_molecule.getAtomAromaticity(v_idx) == ATOM_ALIPHATIC) || _molecule.isPseudoAtom(v_idx) || _molecule.isTemplateAtom(v_idx))
continue;
if (_molecule.getAtomNumber(v_idx) == -1)
continue;
if (qmol != 0 && qmol->getAtom(v_idx).hasConstraint(QueryMolecule::ATOM_CHARGE) && qmol->getAtomCharge(v_idx) == CHARGE_UNKNOWN)
continue;
if (qmol != 0 && qmol->getAtom(v_idx).hasConstraint(QueryMolecule::ATOM_RADICAL) && qmol->getAtomCharge(v_idx) == -1)
continue;
_vertexAromaticGroupIndex[v_idx] = currentAromaticGroup++;
_detectAromaticGroups(v_idx, atom_external_conn);
}
// Add atoms that are connected to the aromaic group with double bonds
// like in O=C1NC=CC=C1
for (int e_idx = _molecule.edgeBegin(); e_idx < _molecule.edgeEnd(); e_idx = _molecule.edgeNext(e_idx))
{
const Edge& e = _molecule.getEdge(e_idx);
int& g1 = _vertexAromaticGroupIndex[e.beg];
int& g2 = _vertexAromaticGroupIndex[e.end];
if (g1 == g2)
continue;
if (_molecule.getBondOrder(e_idx) == BOND_DOUBLE)
{
int dangling_v;
if (g1 != -1)
{
g2 = g1;
dangling_v = e.end;
}
else
{
g1 = g2;
dangling_v = e.beg;
}
// Handle tricky case with 5-valence Nitrogen: CC1=CC=CC=[N]1=C
_vertexIsAcceptDoubleEdge[dangling_v] = false;
_vertexIsAcceptSingleEdge[dangling_v] = true;
}
}
_aromaticGroups = currentAromaticGroup;
return _aromaticGroups;
}
// Construct group structure in DearomatizationsStorage
void DearomatizationsGroups::constructGroups(DearomatizationsStorage& storage, bool needHeteroAtoms)
{
if (storage.getGroupsCount() == 0 && _aromaticGroups != 0)
storage.setGroupsCount(_aromaticGroups);
storage.clearIndices();
for (int group = 0; group < _aromaticGroups; group++)
{
int flags = 0;
if (needHeteroAtoms)
flags = GET_HETERATOMS_INDICES;
getGroupData(group, flags, &_groupData);
storage.setGroup(group, _groupData.bonds.size(), _groupData.bonds.ptr(), _groupData.heteroAtoms.size(), _groupData.heteroAtoms.ptr());
}
}
int DearomatizationsGroups::_getFixedConnectivitySpecific(int elem, int charge, int min_conn, int n_arom)
{
if (elem == ELEM_Se && charge == 0)
{
if (n_arom == 2) // two aromatic bonds
{
if (min_conn == 2) // no external bonds
return 2; // common case
if (min_conn == 3) // one external bond
return 4; // CID 10262587
if (min_conn == 4)
// CID 21204858, two single external bonds
// CID 14984497, one double aromatic bond
return 4;
}
}
else if (elem == ELEM_Se && charge == 1)
{
if (n_arom == 2) // two aromatic bonds
{
if (min_conn == 2) // no external bonds
return 3; // CID 10872228
if (min_conn == 3) // one external bond
return 3; // CID 11115581
}
}
else if (elem == ELEM_As && charge == 0)
{
if (n_arom == 2) // two aromatic bonds
{
if (min_conn == 2) // no external bonds
return 3; // CID 136132
if (min_conn == 3) // one external bond
return 3; // CID 237687
// no other cases known from PubChem
}
}
else if (elem == ELEM_S && charge == 0)
{
if (n_arom == 2)
{
if (min_conn == 3)
return 4; // CS1=[As]C=N[AsH]1
if (min_conn == 4)
return 4; // O=S1N=CC=N1
}
}
else if (elem == ELEM_N && charge == 0)
{
if (n_arom == 2 && min_conn == 4)
return 5; // 5-valence Nitrogen: CC(c1cc[n](=O)cc1)=O
}
return -1;
}
void DearomatizationsGroups::_detectAromaticGroups(int v_idx, const int* atom_external_conn)
{
int non_aromatic_conn = 0;
if (atom_external_conn != nullptr)
non_aromatic_conn = atom_external_conn[v_idx];
const Vertex& vertex = _molecule.getVertex(v_idx);
int n_arom = 0;
for (int i = vertex.neiBegin(); i != vertex.neiEnd(); i = vertex.neiNext(i))
{
int e_idx = vertex.neiEdge(i);
int bond_order = _molecule.getBondOrder(e_idx);
if (bond_order == -1)
if (!_isQueryMolecule)
// Ignore such bonds.
// It may be zero bonds from TautomerSuperStructure
continue;
else if (_molecule.asQueryMolecule().possibleAromaticBond(e_idx))
bond_order = BOND_AROMATIC;
if (bond_order != BOND_AROMATIC)
{
non_aromatic_conn += bond_order;
continue;
}
non_aromatic_conn++;
n_arom++;
int vn_idx = vertex.neiVertex(i);
if (_vertexAromaticGroupIndex[vn_idx] != -1)
continue;
_vertexAromaticGroupIndex[vn_idx] = _vertexAromaticGroupIndex[v_idx];
_detectAromaticGroups(vn_idx, atom_external_conn);
}
bool impl_h_fixed = false;
if (_isQueryMolecule)
{
QueryMolecule& qm = _molecule.asQueryMolecule();
if (atom_external_conn == nullptr)
{
int impl_h = -1;
if (qm.getAtom(v_idx).sureValue(QueryMolecule::ATOM_IMPLICIT_H, impl_h))
{
non_aromatic_conn += impl_h;
impl_h_fixed = true;
}
}
}
else
{
Molecule& m = _molecule.asMolecule();
// Check if number of hydrogens are fixed
if (atom_external_conn == nullptr)
{
int impl_h = m.getImplicitH_NoThrow(v_idx, -1);
if (impl_h != -1)
{
non_aromatic_conn += impl_h;
impl_h_fixed = true;
}
}
}
int label = _molecule.getAtomNumber(v_idx);
int charge = _molecule.getAtomCharge(v_idx);
if (_isQueryMolecule && charge == CHARGE_UNKNOWN)
{
charge = 0;
}
// If radical is undefined then treat it as there are not radical
// Because if there were a radical it should have been explicitly marked
int radical = _molecule.getAtomRadical_NoThrow(v_idx, 0);
if (_isQueryMolecule && radical < 0)
{
radical = 0;
}
int max_connectivity = -1;
if (_isQueryMolecule)
{
if (_molecule.asQueryMolecule().possibleNitrogenV5(v_idx))
max_connectivity = 5;
}
else
{
Molecule& m = _molecule.asMolecule();
if (atom_external_conn == 0)
{
if (m.isNitrogenV5(v_idx))
max_connectivity = 5;
}
else
{
if (m.isNitrogenV5ForConnectivity(v_idx, non_aromatic_conn))
max_connectivity = non_aromatic_conn;
if (m.isNitrogenV5ForConnectivity(v_idx, non_aromatic_conn + 1))
max_connectivity = non_aromatic_conn + 1;
}
}
// Explicit values for specific configurations
if (max_connectivity == -1)
{
max_connectivity = _getFixedConnectivitySpecific(label, charge, non_aromatic_conn, n_arom);
if (max_connectivity != -1)
impl_h_fixed = true;
}
// Apply general purpose method
if (max_connectivity == -1)
max_connectivity = Element::getMaximumConnectivity(label, charge, radical, false);
int atom_aromatic_connectivity = max_connectivity - non_aromatic_conn;
if (atom_aromatic_connectivity < 0) // recalc with use_d_orbital=true
max_connectivity = Element::getMaximumConnectivity(label, charge, radical, true);
atom_aromatic_connectivity = max_connectivity - non_aromatic_conn;
if (atom_aromatic_connectivity < 0)
{
_vertexIsAcceptSingleEdge[v_idx] = false;
_vertexIsAcceptDoubleEdge[v_idx] = false;
}
else
{
_vertexIsAcceptSingleEdge[v_idx] = true;
if (atom_aromatic_connectivity > 0)
{
_vertexIsAcceptDoubleEdge[v_idx] = true;
// If number of implicit hydrogens are fixed and double bond is possible then
// double bond must exist
if (impl_h_fixed)
_vertexIsAcceptSingleEdge[v_idx] = false;
}
else
_vertexIsAcceptDoubleEdge[v_idx] = false;
}
}
bool* DearomatizationsGroups::getAcceptDoubleBonds(void)
{
return _vertexIsAcceptDoubleEdge.ptr();
}
bool DearomatizationsGroups::isAcceptDoubleBond(int atom)
{
return _vertexIsAcceptDoubleEdge[atom];
}
//
// DearomatizationMatcher
//
IMPL_ERROR2(DearomatizationMatcher, DearomatizationException, "Dearomatization matcher");
CP_DEF(DearomatizationMatcher);
DearomatizationMatcher::DearomatizationMatcher(DearomatizationsStorage& dearomatizations, BaseMolecule& molecule, const int* atom_external_conn,
bool skip_superatoms)
: _molecule(molecule), _dearomatizations(dearomatizations), _graphMatchingFixedEdges(molecule), _aromaticGroups(molecule, skip_superatoms), CP_INIT,
TL_CP_GET(_matchedEdges), TL_CP_GET(_matchedEdgesState), TL_CP_GET(_groupExInfo), TL_CP_GET(_verticesInGroup), TL_CP_GET(_verticesAdded),
TL_CP_GET(_edges2GroupMapping), TL_CP_GET(_edges2IndexInGroupMapping), TL_CP_GET(_correctEdgesArray), TL_CP_GET(_verticesFixCount),
TL_CP_GET(_aromaticGroupsData)
{
_needPrepare = true;
_aromaticGroups.detectAromaticGroups(atom_external_conn);
}
bool DearomatizationMatcher::isAbleToFixBond(int edge_idx, int type)
{
if (_dearomatizations.getDearomatizationParams() == Dearomatizer::PARAMS_NO_DEAROMATIZATIONS)
return false;
_prepare();
int group = _edges2GroupMapping[edge_idx];
if (group == -1)
return false;
if (type == BOND_TRIPLE)
return false; // Triple bonds aren't supported
_prepareGroup(group);
if (_dearomatizations.getGroupDearomatizationsCount(group) == 0)
return false;
int offset = _groupExInfo[group].offsetInEdgesState;
byte* groupFixedEdgesPtr = _matchedEdges.ptr() + offset;
int indexInGroup = _edges2IndexInGroupMapping[edge_idx];
byte* groupFixedEdgesStatePtr = _matchedEdgesState.ptr() + offset;
if (_dearomatizations.getDearomatizationParams() == Dearomatizer::PARAMS_SAVE_ALL_DEAROMATIZATIONS)
{
bitSetBit(groupFixedEdgesPtr, indexInGroup, 1);
bitSetBit(groupFixedEdgesStatePtr, indexInGroup, type - 1);
// Try to find dearomatization with the same edges in all dearomatizations
int count = _dearomatizations.getGroupDearomatizationsCount(group);
int activeEdgeState = _groupExInfo[group].activeEdgeState;
int i;
for (i = 0; i < count; i++)
{
const byte* dearomState = _dearomatizations.getGroupDearomatization(group, (i + activeEdgeState) % count);
int nbits = _dearomatizations.getGroupBondsCount(group);
if (bitTestEqualityByMask(dearomState, groupFixedEdgesStatePtr, groupFixedEdgesPtr, nbits))
{
_groupExInfo[group].activeEdgeState = i;
break; // Dearomatization was found
}
}
if (i != count)
{
_lastAcceptedEdge = edge_idx;
_lastAcceptedEdgeType = type;
}
bitSetBit(groupFixedEdgesPtr, indexInGroup, 0);
if (i != count)
return true;
}
else
{
// Try to use active dearomatizations
byte* activeDearom = _dearomatizations.getGroupDearomatization(group, _groupExInfo[group].activeEdgeState);
if (bitGetBit(activeDearom, indexInGroup) == type - 1)
{
bitSetBit(groupFixedEdgesStatePtr, indexInGroup, type - 1);
_lastAcceptedEdge = edge_idx;
_lastAcceptedEdgeType = type;
return true;
}
// Try to modify current dearomatization
_graphMatchingFixedEdges.setEdgesMappingPtr(_edges2IndexInGroupMapping.ptr());
_graphMatchingFixedEdges.setMatchingEdgesPtr(activeDearom);
_graphMatchingFixedEdges.setExtraInfo(groupFixedEdgesPtr);
if (_fixBondInMatching(group, indexInGroup, type))
{
bitSetBit(groupFixedEdgesStatePtr, indexInGroup, type - 1);
_lastAcceptedEdge = edge_idx;
_lastAcceptedEdgeType = type;
return true;
}
// Try to modify other dearomatizations
bitSetBit(groupFixedEdgesPtr, indexInGroup, 1);
bitSetBit(groupFixedEdgesStatePtr, indexInGroup, type - 1);
int count = _dearomatizations.getGroupDearomatizationsCount(group);
for (int i = 0; i < count - 1; i++)
{
int dearom_idx = (i + 1 + _groupExInfo[group].activeEdgeState) % count;
// Get difference between current state and dearomatization state in group
if (_tryToChangeActiveIndex(dearom_idx, group, groupFixedEdgesPtr, groupFixedEdgesStatePtr))
{
bitSetBit(groupFixedEdgesPtr, indexInGroup, 0);
_groupExInfo[group].activeEdgeState = dearom_idx;
_lastAcceptedEdge = edge_idx;
_lastAcceptedEdgeType = type;
return true;
}
}
bitSetBit(groupFixedEdgesPtr, indexInGroup, 0);
return false;
}
return false;
}
bool DearomatizationMatcher::fixBond(int edge_idx, int type)
{
if (_dearomatizations.getDearomatizationParams() == Dearomatizer::PARAMS_NO_DEAROMATIZATIONS)
return false;
_prepare();
int group = _edges2GroupMapping[edge_idx];
if (group == -1)
return false;
if (_lastAcceptedEdge != edge_idx || _lastAcceptedEdgeType != type)
{
if (!isAbleToFixBond(edge_idx, type))
return false;
if (_lastAcceptedEdge != edge_idx || _lastAcceptedEdgeType != type)
throw Error("DearomatizationMatcher::fixBond: internal error");
}
int offset = _groupExInfo[group].offsetInEdgesState;
byte* groupFixedEdgesPtr = _matchedEdges.ptr() + offset;
byte* groupFixedEdgesStatePtr = _matchedEdgesState.ptr() + offset;
int indexInGroup = _edges2IndexInGroupMapping[edge_idx];
bitSetBit(groupFixedEdgesPtr, indexInGroup, 1);
if (bitGetBit(groupFixedEdgesStatePtr, indexInGroup) != type - 1)
throw Error("DearomatizationMatcher::fixBond: internal error #2");
const Edge& edge = _molecule.getEdge(edge_idx);
_verticesFixCount[edge.beg]++;
_verticesFixCount[edge.end]++;
_lastAcceptedEdge = -1;
return true;
}
void DearomatizationMatcher::unfixBond(int edge_idx)
{
if (_dearomatizations.getDearomatizationParams() == Dearomatizer::PARAMS_NO_DEAROMATIZATIONS)
return;
_prepare();
int group = _edges2GroupMapping[edge_idx];
if (group == -1)
return;
byte* groupFixedEdgesPtr = _matchedEdges.ptr() + _groupExInfo[group].offsetInEdgesState;
bitSetBit(groupFixedEdgesPtr, _edges2IndexInGroupMapping[edge_idx], 0);
const Edge& edge = _molecule.getEdge(edge_idx);
_verticesFixCount[edge.beg]--;
_verticesFixCount[edge.end]--;
}
void DearomatizationMatcher::unfixBondByAtom(int atom_idx)
{
if (_dearomatizations.getDearomatizationParams() == Dearomatizer::PARAMS_NO_DEAROMATIZATIONS)
return;
_prepare();
if (_verticesFixCount[atom_idx] == 0)
return;
const Vertex& vertex = _molecule.getVertex(atom_idx);
for (int i = vertex.neiBegin(); i != vertex.neiEnd(); i = vertex.neiNext(i))
unfixBond(vertex.neiEdge(i));
}
void DearomatizationMatcher::_prepare(void)
{
if (!_needPrepare)
return;
if (_dearomatizations.getDearomatizationParams() == Dearomatizer::PARAMS_SAVE_JUST_HETERATOMS)
{
_dearomatizations.clearBondsState();
_aromaticGroups.constructGroups(_dearomatizations, true);
}
else
_aromaticGroups.constructGroups(_dearomatizations, false);
int offset = 0;
_groupExInfo.resize(_dearomatizations.getGroupsCount());
_edges2IndexInGroupMapping.resize(_molecule.edgeEnd());
_edges2GroupMapping.resize(_molecule.edgeEnd());
memset(_edges2IndexInGroupMapping.ptr(), -1, sizeof(int) * _edges2IndexInGroupMapping.size());
memset(_edges2GroupMapping.ptr(), -1, sizeof(int) * _edges2GroupMapping.size());
_verticesFixCount.resize(_molecule.vertexEnd());
_verticesFixCount.zerofill();
int maxGroupDearomatizations = 0;
for (int group = 0; group < _dearomatizations.getGroupsCount(); group++)
{
_groupExInfo[group].offsetInEdgesState = offset;
_groupExInfo[group].activeEdgeState = 0;
if (_dearomatizations.getDearomatizationParams() == Dearomatizer::PARAMS_SAVE_JUST_HETERATOMS)
_groupExInfo[group].needPrepare = true;
else
_groupExInfo[group].needPrepare = false;
maxGroupDearomatizations = std::max(maxGroupDearomatizations, _dearomatizations.getGroupDearomatizationsCount(group));
maxGroupDearomatizations = std::max(maxGroupDearomatizations, _dearomatizations.getGroupHeterAtomsStateCount(group));
int edgesInGroup = _dearomatizations.getGroupBondsCount(group);
const int* edges = _dearomatizations.getGroupBonds(group);
for (int i = 0; i < edgesInGroup; i++)
{
int edge_idx = edges[i];
_edges2GroupMapping[edge_idx] = group;
_edges2IndexInGroupMapping[edge_idx] = i;
}
offset += bitGetSize(edgesInGroup);
}
_matchedEdges.resize(offset);
_matchedEdges.zerofill();
_matchedEdgesState.resize(_matchedEdges.size());
_correctEdgesArray.resize(_matchedEdges.size());
if (_dearomatizations.getDearomatizationParams() != Dearomatizer::PARAMS_SAVE_ALL_DEAROMATIZATIONS)
{
_verticesInGroup.reserve(_molecule.vertexEnd());
_verticesAdded.resize(_molecule.vertexEnd());
_verticesAdded.zeroFill();
_generateUsedVertices();
_graphMatchingFixedEdges.setAllVerticesInMatching();
}
_lastAcceptedEdge = -1;
_lastAcceptedEdgeType = -1;
_needPrepare = false;
}
// Generate used vertices per each group
void DearomatizationMatcher::_generateUsedVertices()
{
for (int group = 0; group < _dearomatizations.getGroupsCount(); group++)
{
_groupExInfo[group].offsetInVertices = _verticesInGroup.size();
const int* groupBonds = _dearomatizations.getGroupBonds(group);
int count = _dearomatizations.getGroupBondsCount(group);
for (int i = 0; i < count; i++)
{
const Edge& edge = _molecule.getEdge(groupBonds[i]);
if (!_verticesAdded.get(edge.beg))
{
_verticesInGroup.push(edge.beg);
_verticesAdded.set(edge.beg);
}
if (!_verticesAdded.get(edge.end))
{
_verticesInGroup.push(edge.end);
_verticesAdded.set(edge.end);
}
}
_groupExInfo[group].verticesUsed = _verticesInGroup.size() - _groupExInfo[group].offsetInVertices;
}
}
// Try to modify dearomatizations to have the same fixed bonds
bool DearomatizationMatcher::_tryToChangeActiveIndex(int dearom_idx, int group, byte* groupFixedEdgesPtr, byte* groupFixedEdgesStatePtr)
{
int bondsCount = _dearomatizations.getGroupBondsCount(group);
byte* dearomState = _dearomatizations.getGroupDearomatization(group, dearom_idx);
bitGetAandBxorNotC(groupFixedEdgesPtr, groupFixedEdgesStatePtr, dearomState, _correctEdgesArray.ptr(), bondsCount);
_graphMatchingFixedEdges.setExtraInfo(_correctEdgesArray.ptr());
_graphMatchingFixedEdges.setMatchingEdgesPtr(dearomState);
int bytesCount = bitGetSize(bondsCount);
for (int i = 0; i < bytesCount; i++)
{
byte dif = groupFixedEdgesPtr[i] & (groupFixedEdgesStatePtr[i] ^ dearomState[i]);
while (dif != 0)
{
int indexInGroup = bitGetOneLOIndex(dif) + i * 8;
if (indexInGroup > bondsCount)
return true;
if (!_fixBondInMatching(group, indexInGroup, bitGetBit(groupFixedEdgesStatePtr, indexInGroup) + 1))
return false;
// Update correct edges
_correctEdgesArray[i] = groupFixedEdgesPtr[i] & (groupFixedEdgesStatePtr[i] ^ ~dearomState[i]);
dif = groupFixedEdgesPtr[i] & (groupFixedEdgesStatePtr[i] ^ dearomState[i]);
}
}
return true;
}
bool DearomatizationMatcher::_fixBondInMatching(int group, int indexInGroup, int type)
{
const int* aromEdges = _dearomatizations.getGroupBonds(group);
const Edge& edge = _molecule.getEdge(aromEdges[indexInGroup]);
bool found = _graphMatchingFixedEdges.findAlternatingPath(edge.beg, edge.end, type != BOND_SINGLE, type != BOND_SINGLE);
if (found)
{
if (type == BOND_SINGLE)
{
_graphMatchingFixedEdges.setEdgeMatching(aromEdges[indexInGroup], false);
_graphMatchingFixedEdges.processPath();
}
else
{
_graphMatchingFixedEdges.processPath();
_graphMatchingFixedEdges.setEdgeMatching(aromEdges[indexInGroup], true);
}
return true;
}
return false;
}
void DearomatizationMatcher::_prepareGroup(int group)
{
if (!_groupExInfo[group].needPrepare)
return;
_groupExInfo[group].needPrepare = false;
if (_dearomatizations.getGroupHeteroAtomsCount(group) != 0 && _dearomatizations.getGroupHeterAtomsStateCount(group) == 0)
return;
// Create mapping from local hetero-atoms indices to atom indices in molecule
_aromaticGroups.getGroupDataFromStorage(_dearomatizations, group, &_aromaticGroupsData);
GraphMatchingVerticesFixed graphMatchingFixedVertices(_molecule);
graphMatchingFixedVertices.setEdgesMappingPtr(_aromaticGroupsData.bondsInvMapping.ptr());
graphMatchingFixedVertices.setVerticesSetPtr(_aromaticGroupsData.vertices.ptr(), _aromaticGroupsData.vertices.size());
graphMatchingFixedVertices.setVerticesMapping(_aromaticGroupsData.heteroAtomsInvMapping.ptr());
graphMatchingFixedVertices.setVerticesAccept(_aromaticGroups.getAcceptDoubleBonds());
// Generate one dearomatization for each hetero-atoms configuration
int count = _dearomatizations.getGroupHeterAtomsStateCount(group);
int index = 0;
do
{
if (count != 0)
{
const byte* heteroAtomsState = _dearomatizations.getGroupHeterAtomsState(group, index++);
graphMatchingFixedVertices.setVerticesState(heteroAtomsState);
}
if (!graphMatchingFixedVertices.findMatching())
throw Error("DearomatizationMatcher::_prepareGroup: internal error");
_dearomatizations.addGroupDearomatization(group, graphMatchingFixedVertices.getEdgesState());
graphMatchingFixedVertices.reset();
} while (index < count);
}
//
// DearomatizationMatcher::GraphMatchingEdgeFixed
//
void DearomatizationMatcher::GraphMatchingEdgeFixed::setExtraInfo(byte* edgesEdges)
{
_edgesState = edgesEdges;
}
bool DearomatizationMatcher::GraphMatchingEdgeFixed::checkEdge(int e_idx)
{
return !bitGetBit(_edgesState, _edgesMapping[e_idx]);
}
DearomatizationMatcher::GraphMatchingEdgeFixed::GraphMatchingEdgeFixed(BaseMolecule& molecule)
: GraphPerfectMatching(molecule, USE_EXTERNAL_EDGES_PTR | USE_EDGES_MAPPING | USE_VERTICES_SET)
{
_edgesState = NULL;
}
//
// DearomatizationMatcher::GraphMatchingVerticesFixed
//
bool DearomatizationMatcher::GraphMatchingVerticesFixed::checkVertex(int v_idx)
{
if (_verticesMapping[v_idx] != -1)
return bitGetBit(_verticesState, _verticesMapping[v_idx]) == 1;
return _verticesAcceptDoubleBond[v_idx];
}
void DearomatizationMatcher::GraphMatchingVerticesFixed::setVerticesState(const byte* verticesState)
{
_verticesState = verticesState;
}
void DearomatizationMatcher::GraphMatchingVerticesFixed::setVerticesMapping(int* verticesMapping)
{
_verticesMapping = verticesMapping;
}
void DearomatizationMatcher::GraphMatchingVerticesFixed::setVerticesAccept(bool* verticesAcceptDoubleBond)
{
_verticesAcceptDoubleBond = verticesAcceptDoubleBond;
}
DearomatizationMatcher::GraphMatchingVerticesFixed::GraphMatchingVerticesFixed(BaseMolecule& molecule)
: GraphPerfectMatching(molecule, USE_EDGES_MAPPING | USE_VERTICES_SET)
{
_verticesState = NULL;
_verticesMapping = NULL;
_verticesAcceptDoubleBond = NULL;
}
//
// MoleculeDearomatizer
//
CP_DEF(MoleculeDearomatizer);
MoleculeDearomatizer::MoleculeDearomatizer(BaseMolecule& mol, DearomatizationsStorage& dearom)
: _dearomatizations(dearom), _mol(mol), CP_INIT, TL_CP_GET(vertex_connectivity)
{
_isQueryMolecule = _mol.isQueryMolecule();
}
void MoleculeDearomatizer::dearomatizeGroup(int group, int dearomatization_index)
{
byte* bondsState = _dearomatizations.getGroupDearomatization(group, dearomatization_index);
const int* bondsMap = _dearomatizations.getGroupBonds(group);
int bondsCount = _dearomatizations.getGroupBondsCount(group);
for (int i = 0; i < bondsCount; i++)
{
if (bitGetBit(bondsState, i))
if (_isQueryMolecule)
dearomatizeQueryBond(_mol.asQueryMolecule(), bondsMap[i], BOND_DOUBLE);
else
_mol.asMolecule().setBondOrder(bondsMap[i], BOND_DOUBLE, true);
else if (_isQueryMolecule)
dearomatizeQueryBond(_mol.asQueryMolecule(), bondsMap[i], BOND_SINGLE);
else
_mol.asMolecule().setBondOrder(bondsMap[i], BOND_SINGLE, true);
}
}
int MoleculeDearomatizer::_getBestDearomatization(int group)
{
// Select group with more double bonds that means less hydrogens
// For example for c1cnn2nnnc2c1 one should select C1=CC2=NN=NN2N=C1
// instead of N1NN2NC=CC=C2N1
int groups = _dearomatizations.getGroupDearomatizationsCount(group);
int best_index = -1, best_count = -1;
for (int i = 0; i < groups; i++)
{
int cnt = _countDoubleBonds(group, i);
if (cnt > best_count)
{
best_count = cnt;
best_index = i;
}
}
return best_index;
}
int MoleculeDearomatizer::_countDoubleBonds(int group, int dearomatization_index)
{
byte* bondsState = _dearomatizations.getGroupDearomatization(group, dearomatization_index);
int bondsCount = _dearomatizations.getGroupBondsCount(group);
int count = 0;
for (int i = 0; i < bondsCount; i++)
if (bitGetBit(bondsState, i))
count++;
return count;
}
void MoleculeDearomatizer::restoreHydrogens(int group, int dearomatization_index)
{
byte* bondsState = _dearomatizations.getGroupDearomatization(group, dearomatization_index);
const int* bondsMap = _dearomatizations.getGroupBonds(group);
int bondsCount = _dearomatizations.getGroupBondsCount(group);
for (int i = 0; i < bondsCount; i++)
{
const Edge& edge = _mol.getEdge(bondsMap[i]);
int order = bitGetBit(bondsState, i) ? 2 : 1;
int v_indices[2] = {edge.beg, edge.end};
for (int j = 0; j < 2; j++)
{
int v = v_indices[j];
if (vertex_connectivity[j] == 0)
{
// Compute non-aromatic connectivity
const Vertex& vertex = _mol.getVertex(v);
for (int nei = vertex.neiBegin(); nei != vertex.neiEnd(); nei = vertex.neiNext(nei))
{
int nei_edge = vertex.neiEdge(nei);
int nei_order = _mol.getBondOrder(nei_edge);
if (nei_order != BOND_AROMATIC)
vertex_connectivity[v] += nei_order;
}
}
}
vertex_connectivity[edge.beg] += order;
vertex_connectivity[edge.end] += order;
}
}
bool MoleculeDearomatizer::dearomatizeMolecule(BaseMolecule& mol, const AromaticityOptions& options)
{
DearomatizationsStorage dst;
Dearomatizer dearomatizer(mol, 0, options);
dearomatizer.setDearomatizationParams(Dearomatizer::PARAMS_SAVE_ONE_DEAROMATIZATION);
dearomatizer.enumerateDearomatizations(dst);
MoleculeDearomatizer mol_dearom(mol, dst);
bool all_dearomatzied = true;
for (int i = 0; i < dst.getGroupsCount(); ++i)
{
int cnt = dst.getGroupDearomatizationsCount(i);
if (cnt == 0)
all_dearomatzied = false;
else if (cnt > 1 && options.unique_dearomatization)
throw NonUniqueDearomatizationException("Dearomatization is not unique");
else
mol_dearom.dearomatizeGroup(i, mol_dearom._getBestDearomatization(i));
}
// Dearomatize RGroups
int n_rgroups = mol.rgroups.getRGroupCount();
for (int i = 1; i <= n_rgroups; i++)
{
PtrPool<BaseMolecule>& frags = mol.rgroups.getRGroup(i).fragments;
for (int j = frags.begin(); j != frags.end(); j = frags.next(j))
{
Molecule& fragment = frags[j]->asMolecule();
dearomatizeMolecule(fragment, options);
}
}
return all_dearomatzied;
}
bool MoleculeDearomatizer::restoreHydrogens(BaseMolecule& mol, const AromaticityOptions& options)
{
bool found_invalid_aromatic_h = false;
bool _isQueryMolecule = mol.isQueryMolecule();
for (int i = mol.vertexBegin(); i != mol.vertexEnd(); i = mol.vertexNext(i))
{
if (mol.isRSite(i) || mol.isPseudoAtom(i) || mol.isTemplateAtom(i))
continue;
if (_isQueryMolecule)
{
// TODO QDEAROM
}
else if (mol.asMolecule().getImplicitH_NoThrow(i, -1) == -1 && mol.getAtomAromaticity(i) == ATOM_AROMATIC)
found_invalid_aromatic_h = true;
}
if (!found_invalid_aromatic_h)
return false;
DearomatizationsStorage dst;
Dearomatizer dearomatizer(mol, 0, options);
dearomatizer.setDearomatizationParams(Dearomatizer::PARAMS_SAVE_ONE_DEAROMATIZATION);
dearomatizer.enumerateDearomatizations(dst);
MoleculeDearomatizer mol_dearom(mol, dst);
mol_dearom.vertex_connectivity.clear_resize(mol.vertexEnd());
mol_dearom.vertex_connectivity.zerofill();
bool all_dearomatzied = true;
for (int i = 0; i < dst.getGroupsCount(); ++i)
{
int cnt = dst.getGroupDearomatizationsCount(i);
if (cnt == 0)
all_dearomatzied = false;
else if (cnt > 1 && options.unique_dearomatization)
throw NonUniqueDearomatizationException("Dearomatization is not unique. Cannot restore hydrogens.");
else
mol_dearom.restoreHydrogens(i, mol_dearom._getBestDearomatization(i));
}
for (int i = mol.vertexBegin(); i != mol.vertexEnd(); i = mol.vertexNext(i))
{
int conn = mol_dearom.vertex_connectivity[i];
if (mol.isRSite(i) || mol.isPseudoAtom(i) || mol.isTemplateAtom(i))
continue;
if (!_isQueryMolecule && (mol.asMolecule().getImplicitH_NoThrow(i, -1) == -1 && conn > 0))
{
int h = mol.asMolecule().calcImplicitHForConnectivity(i, conn);
mol.asMolecule().setImplicitH(i, h);
}
}
return all_dearomatzied;
}
bool MoleculeDearomatizer::restoreHydrogens(BaseMolecule& mol, bool unambiguous_only)
{
AromaticityOptions options;
options.method = AromaticityOptions::GENERIC;
options.unique_dearomatization = unambiguous_only;
return MoleculeDearomatizer::restoreHydrogens(mol, options);
}