core/indigo-core/molecule/src/molecule.cpp (1,278 lines of code) (raw):
/****************************************************************************
* Copyright (C) from 2009 to Present EPAM Systems.
*
* This file is part of Indigo toolkit.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
***************************************************************************/
#include "molecule/molecule.h"
#include "base_c/defs.h"
#include "base_cpp/output.h"
#include "molecule/elements.h"
#include "molecule/molecule_arom.h"
#include "molecule/molecule_dearom.h"
#include "molecule/molecule_standardize.h"
#include "molecule/monomer_commons.h"
#ifdef _MSC_VER
#pragma warning(push, 4)
#endif
using namespace indigo;
Molecule::Molecule()
{
_aromatized = false;
_ignore_bad_valence = false;
}
Molecule& Molecule::asMolecule()
{
return *this;
}
void Molecule::clear()
{
BaseMolecule::clear();
_pseudo_atom_values.clear();
_atoms.clear();
_bond_orders.clear();
_connectivity.clear();
_aromaticity.clear();
_implicit_h.clear();
_total_h.clear();
_valence.clear();
_radicals.clear();
_aromatized = false;
_ignore_bad_valence = false;
updateEditRevision();
}
void Molecule::_flipBond(int atom_parent, int atom_from, int atom_to)
{
int src_bond_idx = findEdgeIndex(atom_parent, atom_from);
int bond_order = getBondOrder(src_bond_idx);
addBond(atom_parent, atom_to, bond_order);
updateEditRevision();
}
void Molecule::_mergeWithSubmolecule(BaseMolecule& bmol, const Array<int>& vertices, const Array<int>* edges, const Array<int>& mapping, int /*skip_flags*/)
{
Molecule& mol = bmol.asMolecule();
_ignore_bad_valence = mol.getIgnoreBadValenceFlag();
int i;
// atoms and pseudo-atoms and connectivities and implicit H counters
for (i = 0; i < vertices.size(); i++)
{
int newidx = mapping[vertices[i]];
_atoms.expand(newidx + 1);
_atoms[newidx] = mol._atoms[vertices[i]];
if (mol.isPseudoAtom(vertices[i]))
setPseudoAtom(newidx, mol.getPseudoAtom(vertices[i]));
if (mol.isTemplateAtom(vertices[i]))
{
setTemplateAtom(newidx, mol.getTemplateAtom(vertices[i]));
setTemplateAtomClass(newidx, mol.getTemplateAtomClass(vertices[i]));
setTemplateAtomSeqid(newidx, mol.getTemplateAtomSeqid(vertices[i]));
setTemplateAtomTemplateIndex(newidx, mol.getTemplateAtomTemplateIndex(vertices[i]));
}
bool nei_mapped = (getVertex(newidx).degree() == mol.getVertex(vertices[i]).degree());
if (mol._connectivity.size() > vertices[i])
{
_connectivity.expandFill(newidx + 1, -1);
if (nei_mapped)
_connectivity[newidx] = mol._connectivity[vertices[i]];
}
if (mol._valence.size() > vertices[i])
{
_valence.expandFill(newidx + 1, -1);
if (nei_mapped)
_valence[newidx] = mol._valence[vertices[i]];
}
if (mol._implicit_h.size() > vertices[i])
{
_implicit_h.expandFill(newidx + 1, -1);
if (nei_mapped)
_implicit_h[newidx] = mol._implicit_h[vertices[i]];
}
if (mol._radicals.size() > vertices[i])
{
_radicals.expandFill(newidx + 1, -1);
if (nei_mapped)
_radicals[newidx] = mol._radicals[vertices[i]];
}
}
// bonds
if (edges != 0)
for (i = 0; i < edges->size(); i++)
{
const Edge& edge = mol.getEdge(edges->at(i));
int beg = mapping[edge.beg];
int end = mapping[edge.end];
if (beg == -1 || end == -1)
// must have been thrown before in mergeWithSubgraph()
throw Error("_mergeWithSubmolecule: internal");
int idx = findEdgeIndex(beg, end);
_bond_orders.expand(idx + 1);
_bond_orders[idx] = mol._bond_orders[edges->at(i)];
}
else
for (i = mol.edgeBegin(); i < mol.edgeEnd(); i = mol.edgeNext(i))
{
const Edge& edge = mol.getEdge(i);
int beg = mapping[edge.beg];
int end = mapping[edge.end];
if (beg == -1 || end == -1)
continue;
int idx = findEdgeIndex(beg, end);
_bond_orders.expand(idx + 1);
_bond_orders[idx] = mol._bond_orders[i];
}
_aromaticity.clear();
}
/*
void Molecule::setQueryBondAromatic (int idx)
{
setBondAromatic(idx);
_q_bonds->at(idx).type = 0;
_q_bonds->at(idx).can_be_aromatic = false;
}
void Molecule::setQueryBondFuzzyAromatic (int idx)
{
_q_bonds->at(idx).can_be_aromatic = true;
}
*/
void Molecule::_validateVertexConnectivity(int idx, bool validate)
{
if (validate)
{
getAtomConnectivity_noImplH(idx);
getImplicitH_NoThrow(idx, -1);
getAtomValence_NoThrow(idx, -1);
}
else
{
if (_connectivity.size() > idx)
_connectivity[idx] = -1;
if (_implicit_h.size() > idx)
{
_atoms[idx].explicit_impl_h = false;
_implicit_h[idx] = -1;
}
if (_total_h.size() > idx)
_total_h[idx] = -1;
if (_valence.size() > idx)
{
_atoms[idx].explicit_valence = false;
_valence[idx] = -1;
}
if (_radicals.size() > idx)
{
_radicals[idx] = -1;
}
}
updateEditRevision();
}
void Molecule::_invalidateVertexCache(int idx)
{
if (!isExplicitValenceSet(idx) && _valence.size() > idx)
_valence[idx] = -1;
if (!isImplicitHSet(idx) && _implicit_h.size() > idx)
_implicit_h[idx] = -1;
if (_total_h.size() > idx)
_total_h[idx] = -1;
}
void Molecule::setBondOrder(int idx, int order, bool keep_connectivity)
{
const Edge& edge = getEdge(idx);
// if (_atoms[edge.beg].explicit_valence || _atoms[edge.end].explicit_valence)
// throw Error("setBondOrder(): explicit valence on atom");
// if (_atoms[edge.beg].explicit_impl_h || _atoms[edge.end].explicit_impl_h)
// throw Error("setBondOrder(): explicit H count on atom");
if (keep_connectivity && _bond_orders[idx] != BOND_AROMATIC && order != BOND_AROMATIC)
throw Error("setBondOrder(): keep_connectivity must be used only with aromatic bonds");
// If connectivity should be kept then calculate connectivity and
// all dependent constants (valence, implicit hydrogens)
_validateVertexConnectivity(edge.beg, keep_connectivity);
_validateVertexConnectivity(edge.end, keep_connectivity);
if (_bond_orders[idx] == BOND_AROMATIC || order == BOND_AROMATIC)
_aromaticity.clear();
_bond_orders[idx] = order;
if (order != BOND_DOUBLE)
cis_trans.setParity(idx, 0);
_aromatized = false;
updateEditRevision();
}
void Molecule::setBondOrder_Silent(int idx, int order)
{
_bond_orders[idx] = order;
updateEditRevision();
}
void Molecule::setAtomCharge(int idx, int charge)
{
_atoms[idx].charge = charge;
if (_implicit_h.size() > idx)
_implicit_h[idx] = -1;
if (_total_h.size() > idx)
_total_h[idx] = -1;
if (_radicals.size() > idx)
_radicals[idx] = -1;
updateEditRevision();
}
void Molecule::setAtomCharge_Silent(int idx, int charge)
{
_atoms[idx].charge = charge;
updateEditRevision();
}
void Molecule::setAtomIsotope(int idx, int isotope)
{
_atoms[idx].isotope = isotope;
updateEditRevision();
}
void Molecule::setAtomRadical(int idx, int radical)
{
_radicals.expandFill(idx + 1, -1);
_radicals[idx] = radical;
_invalidateVertexCache(idx);
updateEditRevision();
}
void Molecule::setExplicitValence(int idx, int valence)
{
_valence.expandFill(idx + 1, -1);
_valence[idx] = valence;
_atoms[idx].explicit_valence = true;
_invalidateVertexCache(idx);
updateEditRevision();
}
void Molecule::resetExplicitValence(int idx)
{
if (_valence.size() > idx)
_valence[idx] = -1;
_atoms[idx].explicit_valence = false;
_invalidateVertexCache(idx);
updateEditRevision();
}
bool Molecule::isExplicitValenceSet(int idx)
{
return _atoms[idx].explicit_valence;
}
void Molecule::setValence(int idx, int valence)
{
_valence.expandFill(idx + 1, -1);
_valence[idx] = valence;
updateEditRevision();
}
void Molecule::setImplicitH(int idx, int impl_h)
{
_implicit_h.expandFill(idx + 1, -1);
_implicit_h[idx] = impl_h;
_atoms[idx].explicit_impl_h = true;
_invalidateVertexCache(idx);
updateEditRevision();
}
bool Molecule::isImplicitHSet(int idx)
{
return _atoms[idx].explicit_impl_h;
}
void Molecule::setPseudoAtom(int idx, const char* text)
{
_atoms[idx].number = ELEM_PSEUDO;
_atoms[idx].pseudoatom_value_idx = _pseudo_atom_values.add(text);
// TODO: take care of memory allocated here in _pseudo_atom_values
updateEditRevision();
}
int Molecule::addTemplateAtom(const char* text)
{
int idx = addAtom(ELEM_TEMPLATE);
setTemplateAtom(idx, text);
return idx;
}
void Molecule::setTemplateAtom(int idx, const char* text)
{
_atoms[idx].number = ELEM_TEMPLATE;
_atoms[idx].template_occur_idx = _template_occurrences.add();
_TemplateOccurrence& occur = _template_occurrences.at(_atoms[idx].template_occur_idx);
occur.name_idx = _template_names.add(text);
occur.seq_id = -1;
occur.template_idx = -1;
occur.contracted = DisplayOption::Undefined;
updateEditRevision();
}
int Molecule::getVacantPiOrbitals(int atom_idx, int conn, int* lonepairs_out)
{
int group = Element::group(getAtomNumber(atom_idx));
int charge = getAtomCharge(atom_idx);
int radical = getAtomRadical(atom_idx);
return BaseMolecule::getVacantPiOrbitals(group, charge, radical, conn, lonepairs_out);
}
int Molecule::getVacantPiOrbitals(int atom_idx, int* lonepairs_out)
{
return getVacantPiOrbitals(atom_idx, getAtomConnectivity(atom_idx), lonepairs_out);
}
int Molecule::getAtomConnectivity(int idx)
{
int conn = getAtomConnectivity_noImplH(idx);
if (conn < 0)
return -1;
int impl_h = getImplicitH(idx);
return impl_h + conn;
}
int Molecule::getAtomConnectivity_NoThrow(int idx, int fallback)
{
try
{
return getAtomConnectivity(idx);
}
catch (Element::Error&)
{
return fallback;
}
}
int Molecule::getAtomConnectivity_noImplH(int idx)
{
if (_connectivity.size() > idx && _connectivity[idx] >= 0)
return _connectivity[idx];
int conn = calcAtomConnectivity_noImplH(idx);
_connectivity.expandFill(idx + 1, -1);
_connectivity[idx] = conn;
return conn;
}
int Molecule::calcAtomConnectivity_noImplH(int idx)
{
const Vertex& vertex = getVertex(idx);
int i, conn = 0;
for (i = vertex.neiBegin(); i != vertex.neiEnd(); i = vertex.neiNext(i))
{
int order = getBondOrder(vertex.neiEdge(i));
if (order == BOND_AROMATIC)
return -1;
if (order == -1) // can happen on TautomerSuperStructure
continue;
if (order == _BOND_HYDROGEN || order == _BOND_COORDINATION)
continue;
conn += order;
}
for (i = 1; i <= attachmentPointCount(); i++)
{
int j = 0, aidx;
for (j = 0; (aidx = getAttachmentPoint(i, j)) != -1; j++)
{
if (aidx == idx)
conn++;
}
}
return conn;
}
void Molecule::calcAromaticAtomConnectivity(int idx, int& n_arom, int& min_conn)
{
const Vertex& vertex = getVertex(idx);
int i;
n_arom = 0;
min_conn = 0;
for (i = vertex.neiBegin(); i != vertex.neiEnd(); i = vertex.neiNext(i))
{
int order = getBondOrder(vertex.neiEdge(i));
if (order == BOND_AROMATIC)
{
min_conn++;
n_arom++;
}
else
min_conn += order;
}
if (isImplicitHSet(idx))
min_conn += getImplicitH(idx);
}
int Molecule::totalHydrogensCount()
{
int i, total_h = 0;
for (i = vertexBegin(); i < vertexEnd(); i = vertexNext(i))
{
if (getAtomNumber(i) == ELEM_H)
total_h++;
total_h += getImplicitH(i);
}
return total_h;
}
int Molecule::matchAtomsCmp(Graph& g1, Graph& g2, int idx1, int idx2, void* /*userdata*/)
{
Molecule& m1 = ((BaseMolecule&)g1).asMolecule();
Molecule& m2 = ((BaseMolecule&)g2).asMolecule();
if (m1.isPseudoAtom(idx1) && !m2.isPseudoAtom(idx2))
return 1;
if (!m1.isPseudoAtom(idx1) && m2.isPseudoAtom(idx2))
return -1;
if (m1.isTemplateAtom(idx1) && !m2.isTemplateAtom(idx2))
return 1;
if (!m1.isTemplateAtom(idx1) && m2.isTemplateAtom(idx2))
return -1;
if (m1.isRSite(idx1) && !m2.isRSite(idx2))
return 1;
if (!m1.isRSite(idx1) && m2.isRSite(idx2))
return -1;
if (m1.isAtomHighlighted(idx1) && !m2.isAtomHighlighted(idx2))
return 1;
if (!m1.isAtomHighlighted(idx1) && m2.isAtomHighlighted(idx2))
return -1;
QS_DEF(Array<int>, ai1);
QS_DEF(Array<int>, ai2);
m1.getAttachmentIndicesForAtom(idx1, ai1);
m2.getAttachmentIndicesForAtom(idx2, ai2);
if (ai1.size() != ai2.size())
return ai1.size() - ai2.size();
int i;
for (i = 0; i != ai1.size(); i++)
if (ai1[i] != ai2[i])
return ai1[i] - ai2[i];
bool pseudo = false;
if (m1.isRSite(idx1) && m2.isRSite(idx2))
{
int res = m2.getRSiteBits(idx2) - m1.getRSiteBits(idx1);
if (res != 0)
return res;
pseudo = true;
}
if (m1.isPseudoAtom(idx1) && m2.isPseudoAtom(idx2))
{
int res = strcmp(m1.getPseudoAtom(idx1), m2.getPseudoAtom(idx2));
if (res != 0)
return res;
pseudo = true;
}
else if (m1.isTemplateAtom(idx1) && m2.isTemplateAtom(idx2))
{
int res = strcmp(m1.getTemplateAtom(idx1), m2.getTemplateAtom(idx2));
if (res != 0)
return res;
pseudo = true;
}
else
{
if (m1.getAtomNumber(idx1) > m2.getAtomNumber(idx2))
return 1;
if (m1.getAtomNumber(idx1) < m2.getAtomNumber(idx2))
return -1;
}
if (m1.getAtomIsotope(idx1) > m2.getAtomIsotope(idx2))
return 1;
if (m1.getAtomIsotope(idx1) < m2.getAtomIsotope(idx2))
return -1;
if (m1.getAtomCharge(idx1) > m2.getAtomCharge(idx2))
return 1;
if (m1.getAtomCharge(idx1) < m2.getAtomCharge(idx2))
return -1;
if (!pseudo && m1.getAtomRadical(idx1) > m2.getAtomRadical(idx2))
return 1;
if (!pseudo && m1.getAtomRadical(idx1) < m2.getAtomRadical(idx2))
return -1;
return 0;
}
int Molecule::getAtomAromaticity(int idx)
{
if (_aromaticity.size() > idx && _aromaticity[idx] >= 0)
return _aromaticity[idx];
const Vertex& vertex = getVertex(idx);
int i;
for (i = vertex.neiBegin(); i != vertex.neiEnd(); i = vertex.neiNext(i))
{
int order = getBondOrder(vertex.neiEdge(i));
if (order == BOND_AROMATIC)
{
_aromaticity.expandFill(idx + 1, -1);
_aromaticity[idx] = ATOM_AROMATIC;
return ATOM_AROMATIC;
}
// not checking order == -1 because it is not QueryMolecule
}
_aromaticity.expandFill(idx + 1, -1);
_aromaticity[idx] = ATOM_ALIPHATIC;
return ATOM_ALIPHATIC;
}
void Molecule::_removeAtoms(const Array<int>& indices, const int* mapping)
{
int i;
for (i = 0; i < indices.size(); i++)
{
int idx = indices[i];
const Vertex& vertex = getVertex(idx);
int j;
for (j = vertex.neiBegin(); j != vertex.neiEnd(); j = vertex.neiNext(j))
{
int nei = vertex.neiVertex(j);
int order = getBondOrder(vertex.neiEdge(j));
if (mapping[nei] < 0) // the neighbor is marked for removal too
continue;
// Precalculate and store into cache number of implicit hydrogens
// This is required for correct hydrogens unfolding for molecules
// like [H]S([H])([H])C (that is seems to be invalid)
if (!isRSite(nei) && !isPseudoAtom(nei) && !isTemplateAtom(nei))
if (_implicit_h.size() <= nei || _implicit_h[nei] < 0)
getImplicitH_NoThrow(nei, -1);
if (_implicit_h.size() > nei && _implicit_h[nei] >= 0)
{
if (order == BOND_SINGLE)
_implicit_h[nei]++;
else if (order == BOND_DOUBLE)
_implicit_h[nei] += 2;
else if (order == BOND_TRIPLE)
_implicit_h[nei] += 3;
else
_implicit_h[nei] = -1;
}
if (_connectivity.size() > nei && _connectivity[nei] >= 0)
{
if (order == BOND_SINGLE)
_connectivity[nei]--;
else if (order == BOND_DOUBLE)
_connectivity[nei] -= 2;
else if (order == BOND_TRIPLE)
_connectivity[nei] -= 3;
else
_connectivity[nei] = -1;
}
}
_validateVertexConnectivity(idx, false);
}
updateEditRevision();
}
int Molecule::getImplicitH(int idx, bool impl_h_no_throw)
{
if (impl_h_no_throw)
return getImplicitH_NoThrow(idx, 0);
else
return getImplicitH(idx);
}
int Molecule::getImplicitH(int idx)
{
int conn = getAtomConnectivity_noImplH(idx);
return _getImplicitHForConnectivity(idx, conn, true);
}
int Molecule::getImplicitH_NoThrow(int idx, int fallback)
{
try
{
return getImplicitH(idx);
}
catch (Element::Error&)
{
return fallback;
}
}
int Molecule::calcImplicitHForConnectivity(int idx, int conn)
{
try
{
return _getImplicitHForConnectivity(idx, conn, false);
}
catch (Element::Error&)
{
return -1;
}
}
int Molecule::_getImplicitHForConnectivity(int idx, int conn, bool use_cache)
{
if (_atoms[idx].number == ELEM_PSEUDO)
throw Error("getImplicitH() does not work on pseudo-atoms");
if (_atoms[idx].number == ELEM_RSITE)
throw Error("getImplicitH() does not work on R-sites");
if (_atoms[idx].number == ELEM_TEMPLATE)
throw Error("getImplicitH() does not work on template atoms");
if (use_cache)
{
if (_implicit_h.size() > idx && _implicit_h[idx] >= 0)
return _implicit_h[idx];
}
const _Atom& atom = _atoms[idx];
int radical = 0;
if (_radicals.size() > idx && _radicals[idx] >= 0)
radical = _radicals[idx];
int impl_h = -1;
if (conn < 0)
{
if (getAtomAromaticity(idx) == ATOM_AROMATIC)
{
int degree = getVertex(idx).degree();
int i;
for (i = 1; i <= attachmentPointCount(); i++)
{
int j = 0, aidx;
for (j = 0; (aidx = getAttachmentPoint(i, j)) != -1; j++)
{
if (aidx == idx)
degree++;
}
}
if (atom.number == ELEM_C && atom.charge == 0)
{
if (degree == 3)
impl_h = -Element::radicalElectrons(radical);
else if (degree == 2)
impl_h = 1 - Element::radicalElectrons(radical);
}
else if (atom.number == ELEM_O && atom.charge == 0)
impl_h = 0;
else if (atom.number == ELEM_N && atom.charge == 0 && degree == 3)
impl_h = 0;
else if (atom.number == ELEM_N && atom.charge == 1 && degree == 3)
impl_h = 0;
else if (atom.number == ELEM_S && atom.charge == 0 && degree == 3)
impl_h = 0;
}
else
throw Error("internal: unsure connectivity on an aliphatic atom");
if (impl_h < 0)
{
if (_ignore_bad_valence)
impl_h = 0;
else
throw Element::Error("cannot calculate implicit hydrogens on aromatic %s, charge %d, degree %d, %d radical electrons",
Element::toString(atom.number), atom.charge, getVertex(idx).degree(), Element::radicalElectrons(radical));
}
}
else
{
if (atom.explicit_valence)
{
// Explicit valence means that the molecule was converted from Molfile.
// Conventions are that if we have explicit valence, we discard radical
// and charge when calculating implicit hydgogens.
impl_h = _valence[idx] - Element::calcValenceMinusHyd(atom.number, 0, 0, conn);
if (impl_h < 0)
{
if (_ignore_bad_valence)
impl_h = 0;
else
throw Element::Error("explicit valence %d specified on %s, but %d bonds are drawn", _valence[idx], Element::toString(atom.number), conn);
}
}
else if (isNitrogenV5(idx))
{
// special case of 5-connected nitrogen like "CN(=O)=O".
// It should really be C[N+](O-)=O, but we let people live in happy ignorance.
impl_h = 0;
}
else
{
radical = -1;
if (_radicals.size() > idx)
radical = _radicals[idx];
int valence;
if (radical == -1)
{
// no information about implicit H, not sure about radical either --
// this can happen exclusively in CML.
if (Element::calcValence(atom.number, atom.charge, 0, conn, valence, impl_h, false))
radical = 0;
else if (Element::calcValence(atom.number, atom.charge, RADICAL_SINGLET, conn, valence, impl_h, false))
radical = RADICAL_SINGLET;
else if (Element::calcValence(atom.number, atom.charge, RADICAL_DOUBLET, conn, valence, impl_h, false))
radical = RADICAL_DOUBLET;
else
{
throw Element::Error("can not calculate valence on %s, charge %d, connectivity %d", Element::toString(atom.number), atom.charge, conn);
}
if (use_cache)
{
_radicals.expandFill(idx + 1, -1);
_radicals[idx] = radical;
}
}
else
{
// no information about implicit H, but sure about radical --
// this is a commmon situtation for Molfiles or non-bracketed SMILES atoms.
// Will throw an error on 5-valent carbon and such.
if (_ignore_bad_valence)
Element::calcValence(atom.number, atom.charge, radical, conn, valence, impl_h, false);
else
Element::calcValence(atom.number, atom.charge, radical, conn, valence, impl_h, true);
}
}
}
if (use_cache)
{
_implicit_h.expandFill(idx + 1, -1);
_implicit_h[idx] = impl_h;
}
if (impl_h < 0)
throw Error("_getImplicitHForConnectivity(): internal");
return impl_h;
}
bool Molecule::isNitrogenV5(int idx)
{
int conn = getAtomConnectivity_noImplH(idx);
return isNitrogenV5ForConnectivity(idx, conn);
}
bool Molecule::isNitrogenV5ForConnectivity(int idx, int conn)
{
if (getAtomNumber(idx) != ELEM_N)
return false;
if (getAtomCharge(idx) != 0)
return false;
int radical = 0;
if (_radicals.size() > idx && _radicals[idx] >= 0)
radical = _radicals[idx];
int radical_elections = Element::radicalElectrons(radical);
return (radical_elections == 0 && conn == 5) || (radical_elections == 1 && conn == 4);
}
int Molecule::getAtomNumber(int idx)
{
return _atoms[idx].number;
}
int Molecule::getAtomCharge(int idx)
{
return _atoms[idx].charge;
}
int Molecule::getAtomIsotope(int idx)
{
return _atoms[idx].isotope;
}
bool Molecule::getIgnoreBadValenceFlag()
{
return _ignore_bad_valence;
}
void Molecule::setIgnoreBadValenceFlag(bool flag)
{
_ignore_bad_valence = flag;
}
int Molecule::getAtomValence(int idx)
{
if (_atoms[idx].number == ELEM_PSEUDO)
throw Error("getAtomValence() does not work on pseudo-atoms");
if (_atoms[idx].number == ELEM_TEMPLATE)
throw Error("getAtomValence() does not work on template atoms");
if (_atoms[idx].number == ELEM_RSITE)
throw Error("getAtomValence() does not work on R-sites");
if (_valence.size() > idx && _valence[idx] >= 0)
return _valence[idx];
if (isNitrogenV5(idx))
{
_valence.expandFill(idx + 1, -1);
_valence[idx] = 4;
return 4;
}
const _Atom& atom = _atoms[idx];
int conn = getAtomConnectivity_noImplH(idx);
if (conn < 0)
{
int min_conn, n_arom;
calcAromaticAtomConnectivity(idx, n_arom, min_conn);
int val = Element::calcValenceOfAromaticAtom(atom.number, atom.charge, n_arom, min_conn);
if (val >= 0)
{
_valence.expandFill(idx + 1, -1);
_valence[idx] = val;
return val;
}
if (_ignore_bad_valence)
{
val = min_conn;
_valence.expandFill(idx + 1, -1);
_valence[idx] = val;
return val;
}
else
throw Element::Error("can not calculate valence of %s (%d aromatic bonds, min connectivity %d, charge %d)", Element::toString(atom.number), n_arom,
min_conn, atom.charge);
}
int radical = -1;
int impl_h = -1;
int valence;
bool unusual_valence = false;
if (_radicals.size() > idx && _radicals[idx] >= 0)
radical = _radicals[idx];
if (_implicit_h.size() > idx && _implicit_h[idx] >= 0)
{
impl_h = _implicit_h[idx];
int normal_impl_h;
if (radical == -1)
{
// have implicit H count, but no information about radical. Frequently occurs in SMILES
// expressions like [CH2] or [C]
if (Element::calcValence(atom.number, atom.charge, 0, conn, valence, normal_impl_h, false) && normal_impl_h == impl_h)
radical = 0; // [SiH4]
else if (Element::calcValence(atom.number, atom.charge, RADICAL_SINGLET, conn, valence, normal_impl_h, false) && normal_impl_h == impl_h)
radical = RADICAL_SINGLET; // [CH2]
else if (Element::calcValence(atom.number, atom.charge, RADICAL_DOUBLET, conn, valence, normal_impl_h, false) && normal_impl_h == impl_h)
radical = RADICAL_DOUBLET; // [CH3]
else if (Element::calcValence(atom.number, atom.charge, 0, conn + impl_h, valence, normal_impl_h, false) && normal_impl_h == 0)
{
radical = 0; // [PH5]
valence = conn + impl_h;
unusual_valence = true;
}
else if (Element::calcValence(atom.number, atom.charge, RADICAL_SINGLET, conn + impl_h, valence, normal_impl_h, false) && normal_impl_h == 0)
{
radical = RADICAL_SINGLET;
valence = conn + impl_h;
unusual_valence = true;
}
else if (Element::calcValence(atom.number, atom.charge, RADICAL_DOUBLET, conn + impl_h, valence, normal_impl_h, false) && normal_impl_h == 0)
{
radical = RADICAL_DOUBLET; // [PH4]
valence = conn + impl_h;
unusual_valence = true;
}
else if (atom.number == ELEM_C) // [C], [CH]
{
radical = 0;
valence = conn + impl_h;
unusual_valence = true;
}
else if ((abs(atom.charge) == 1) && (atom.number == ELEM_N || atom.number == ELEM_O)) // [N+], [O-]
{
radical = 0;
valence = conn + impl_h;
unusual_valence = true;
}
if (radical != -1)
{
_radicals.expandFill(idx + 1, -1);
_radicals[idx] = radical;
}
}
else
{
// have both implicit H count and radicals -- can happen in CML or in extended SMILES
if (Element::calcValence(atom.number, atom.charge, radical, conn, valence, normal_impl_h, false) && normal_impl_h == impl_h)
;
else
{
// rare case
valence = conn + impl_h;
unusual_valence = true;
}
}
}
else
{
if (radical == -1)
{
// no information about implicit H, not sure about radical either --
// this can happen exclusively in CML.
if (Element::calcValence(atom.number, atom.charge, 0, conn, valence, impl_h, false))
radical = 0;
else if (Element::calcValence(atom.number, atom.charge, RADICAL_SINGLET, conn, valence, impl_h, false))
radical = RADICAL_SINGLET;
else if (Element::calcValence(atom.number, atom.charge, RADICAL_DOUBLET, conn, valence, impl_h, false))
radical = RADICAL_DOUBLET;
else
throw Element::Error("can not calculate valence on %s, charge %d, connectivity %d", Element::toString(atom.number), atom.charge, conn);
_radicals.expandFill(idx + 1, -1);
_radicals[idx] = radical;
}
else
{
// no information about implicit H, but sure about radical --
// this is a commmon situtation for Molfiles or non-bracketed SMILES atoms.
// Will throw an error on 5-valent carbon and such.
if (_ignore_bad_valence)
{
Element::calcValence(atom.number, atom.charge, radical, conn, valence, impl_h, false);
}
else
{
Element::calcValence(atom.number, atom.charge, radical, conn, valence, impl_h, true);
}
}
_implicit_h.expandFill(idx + 1, -1);
_implicit_h[idx] = impl_h;
}
_valence.expandFill(idx + 1, -1);
_valence[idx] = valence;
if (unusual_valence)
_atoms[idx].explicit_valence = true;
return valence;
}
int Molecule::getAtomRadical(int idx)
{
if (_atoms[idx].number == ELEM_PSEUDO)
throw Error("getAtomRadical() does not work on pseudo-atoms");
if (_atoms[idx].number == ELEM_RSITE)
throw Error("getAtomRadical() does not work on R-sites");
if (_atoms[idx].number == ELEM_TEMPLATE)
throw Error("getAtomRadical() does not work on template atoms");
if (_radicals.size() > idx && _radicals[idx] >= 0)
return _radicals[idx];
getAtomValence(idx);
if (_radicals.size() > idx && _radicals[idx] >= 0)
return _radicals[idx];
// getAtomValence() did not help: now we know that
// this is either an aromatic atom or 5-valence nitrogen;
// in any case, radical is zero.
_radicals.expandFill(idx + 1, -1);
_radicals[idx] = 0;
return 0;
}
void Molecule::saveBondOrders(Molecule& mol, Array<int>& orders)
{
orders.copy(mol._bond_orders);
}
void Molecule::loadBondOrders(Molecule& mol, Array<int>& orders)
{
mol._bond_orders.copy(orders);
mol.updateEditRevision();
}
int Molecule::getAtomSubstCount(int idx)
{
int i, res = 0;
const Vertex& vertex = getVertex(idx);
for (i = vertex.neiBegin(); i != vertex.neiEnd(); i = vertex.neiNext(i))
{
if (_atoms[vertex.neiVertex(i)].number != ELEM_H)
res++;
}
return res;
}
int Molecule::getAtomRingBondsCount(int idx)
{
int i, res = 0;
const Vertex& vertex = getVertex(idx);
for (i = vertex.neiBegin(); i != vertex.neiEnd(); i = vertex.neiNext(i))
{
if (getEdgeTopology(vertex.neiEdge(i)) == TOPOLOGY_RING)
res++;
}
return res;
}
bool Molecule::isSaturatedAtom(int idx)
{
int i;
const Vertex& vertex = getVertex(idx);
for (i = vertex.neiBegin(); i != vertex.neiEnd(); i = vertex.neiNext(i))
if (getBondOrder(vertex.neiEdge(i)) != BOND_SINGLE)
return false;
return true;
}
int Molecule::getBondOrder(int idx) const
{
return _bond_orders[idx];
}
int Molecule::getBondTopology(int idx)
{
return getEdgeTopology(idx);
}
int Molecule::getExplicitValence(int idx)
{
if (_atoms[idx].explicit_valence)
return _valence[idx];
if (_atoms[idx].number == ELEM_PSEUDO || _atoms[idx].number == ELEM_RSITE || _atoms[idx].number == ELEM_TEMPLATE)
return -1;
// try to calculate explicit valence from hydrogens, as in elemental carbon [C]
try
{
getAtomValence(idx);
}
catch (Element::Error&)
{
return -1;
}
if (_atoms[idx].explicit_valence)
return _valence[idx];
return -1;
}
bool Molecule::atomNumberBelongs(int idx, const int* numbers, int count)
{
int number = _atoms[idx].number;
int i;
for (i = 0; i < count; i++)
if (number == numbers[i])
return true;
return false;
}
bool Molecule::possibleAtomNumber(int idx, int number)
{
return _atoms[idx].number == number;
}
bool Molecule::possibleAtomNumberAndCharge(int idx, int number, int charge)
{
return _atoms[idx].number == number && _atoms[idx].charge == charge;
}
bool Molecule::possibleAtomNumberAndIsotope(int idx, int number, int isotope)
{
return _atoms[idx].number == number && _atoms[idx].isotope == isotope;
}
bool Molecule::possibleAtomIsotope(int idx, int isotope)
{
return _atoms[idx].isotope == isotope;
}
bool Molecule::possibleAtomCharge(int idx, int charge)
{
return _atoms[idx].charge == charge;
}
void Molecule::getAtomDescription(int idx, Array<char>& description)
{
_Atom& atom = _atoms[idx];
ArrayOutput output(description);
if (atom.isotope != 0)
output.printf("%d", atom.isotope);
if (isPseudoAtom(idx))
output.printf("%s", getPseudoAtom(idx));
else if (isTemplateAtom(idx))
output.printf("%s", getTemplateAtom(idx));
else
output.printf("%s", Element::toString(atom.number));
if (atom.charge == -1)
output.printf("-");
else if (atom.charge == 1)
output.printf("+");
else if (atom.charge > 0)
output.printf("+%d", atom.charge);
else if (atom.charge < 0)
output.printf("-%d", -atom.charge);
output.writeChar(0);
}
void Molecule::getBondDescription(int idx, Array<char>& description)
{
ArrayOutput output(description);
switch (_bond_orders[idx])
{
case BOND_SINGLE:
output.printf("single");
return;
case BOND_DOUBLE:
output.printf("double");
return;
case BOND_TRIPLE:
output.printf("triple");
return;
case BOND_AROMATIC:
output.printf("aromatic");
return;
}
}
bool Molecule::possibleBondOrder(int idx, int order)
{
return _bond_orders[idx] == order;
}
int Molecule::addAtom(int number)
{
int idx = _addBaseAtom();
_atoms.expand(idx + 1);
return resetAtom(idx, number);
}
int Molecule::resetAtom(int idx, int number)
{
updateEditRevision();
memset(&_atoms[idx], 0, sizeof(_Atom));
_atoms[idx].number = number;
_validateVertexConnectivity(idx, false);
return idx;
}
int Molecule::addBond(int beg, int end, int order)
{
updateEditRevision();
int idx = _addBaseBond(beg, end);
_bond_orders.expand(idx + 1);
_bond_orders[idx] = order;
_aromaticity.clear();
_aromatized = false;
_validateVertexConnectivity(beg, false);
_validateVertexConnectivity(end, false);
return idx;
}
int Molecule::addBond_Silent(int beg, int end, int order)
{
updateEditRevision();
int idx = _addBaseBond(beg, end);
_bond_orders.expand(idx + 1);
_bond_orders[idx] = order;
_aromaticity.clear();
_aromatized = false;
return idx;
}
bool Molecule::isPseudoAtom(int idx)
{
return _atoms[idx].number == ELEM_PSEUDO;
}
const char* Molecule::getPseudoAtom(int idx)
{
const _Atom& atom = _atoms[idx];
if (atom.number != ELEM_PSEUDO)
throw Error("getPseudoAtom(): atom #%d is not a pseudoatom", idx);
const char* res = _pseudo_atom_values.at(atom.pseudoatom_value_idx);
if (res == 0)
throw Error("pseudoatom string is zero");
return res;
}
bool Molecule::isTemplateAtom(int idx)
{
return _atoms[idx].number == ELEM_TEMPLATE;
}
int Molecule::getTemplateAtomOccurrence(int idx)
{
const _Atom& atom = _atoms[idx];
if (atom.number != ELEM_TEMPLATE)
throw Error("getTemplateAtomOccurrence(): atom #%d is not a template atom", idx);
return atom.template_occur_idx;
}
BaseMolecule* Molecule::neu()
{
return new Molecule();
}
bool Molecule::bondStereoCare(int idx)
{
if (!cis_trans.exists())
return false;
// In ordinary molecule all bond's stereoconfigurations are important
return cis_trans.getParity(idx) != 0;
}
bool Molecule::aromatize(const AromaticityOptions& options)
{
updateEditRevision();
bool arom_found = MoleculeAromatizer::aromatizeBonds(*this, options);
_aromatized = true;
return arom_found;
}
bool Molecule::dearomatize(const AromaticityOptions& options)
{
updateEditRevision();
return MoleculeDearomatizer::dearomatizeMolecule(*this, options);
}
int Molecule::getAtomMaxH(int idx)
{
return getAtomTotalH(idx);
}
int Molecule::getAtomMinH(int idx)
{
return getAtomTotalH(idx);
}
int Molecule::getAtomTotalH(int idx)
{
if (_total_h.size() > idx && _total_h[idx] >= 0)
return _total_h[idx];
int i, h = getImplicitH(idx);
const Vertex& vertex = getVertex(idx);
for (i = vertex.neiBegin(); i != vertex.neiEnd(); i = vertex.neiNext(i))
if (getAtomNumber(vertex.neiVertex(i)) == ELEM_H)
h++;
_total_h.expandFill(idx + 1, -1);
_total_h[idx] = h;
return h;
}
bool Molecule::isRSite(int atom_idx)
{
return _atoms[atom_idx].number == ELEM_RSITE;
}
dword Molecule::getRSiteBits(int atom_idx)
{
if (_atoms[atom_idx].number != ELEM_RSITE)
throw Error("getRSiteBits(): atom #%d is not an r-site", atom_idx);
return _atoms[atom_idx].rgroup_bits;
}
void Molecule::setRSiteBits(int atom_idx, int bits)
{
if (_atoms[atom_idx].number != ELEM_RSITE)
throw Error("setRSiteBits(): atom #%d is not an r-site", atom_idx);
_atoms[atom_idx].rgroup_bits = bits;
updateEditRevision();
}
void Molecule::allowRGroupOnRSite(int atom_idx, int rg_idx)
{
if (_atoms[atom_idx].number != ELEM_RSITE)
throw Error("allowRGroupOnRSite(): atom #%d is not an r-site", atom_idx);
if (rg_idx < 1 || rg_idx > 32)
throw Error("allowRGroupOnRSite(): rgroup number %d is invalid", rg_idx);
rg_idx--;
_atoms[atom_idx].rgroup_bits |= (1 << rg_idx);
updateEditRevision();
}
void Molecule::invalidateHCounters()
{
_implicit_h.clear();
_total_h.clear();
_connectivity.clear();
}
void Molecule::checkForConsistency(Molecule& mol)
{
// Try to restore hydrogens in aromatic cycles first
mol.restoreAromaticHydrogens();
for (int i : mol.vertices())
{
if (mol.isPseudoAtom(i) || mol.isRSite(i) || mol.isTemplateAtom(i))
continue;
// check that we are sure about valence
// (if the radical is not set, it is calculated from the valence anyway)
mol.getAtomValence(i);
// check that we are sure about implicit H counter and valence
mol.getImplicitH(i);
}
}
bool Molecule::isAromatized()
{
return _aromatized;
}
bool Molecule::shouldWriteHCount(Molecule& mol, int idx)
{
return shouldWriteHCountEx(mol, idx, 0);
}
// Moved this method here to supply both Smiles and CML savers
bool Molecule::shouldWriteHCountEx(Molecule& mol, int idx, int h_to_ignore)
{
if (mol.isPseudoAtom(idx) || mol.isRSite(idx) || mol.isTemplateAtom(idx))
return false;
bool aromatic = (mol.getAtomAromaticity(idx) == ATOM_AROMATIC);
int atom_number = mol.getAtomNumber(idx);
int charge = mol.getAtomCharge(idx);
// We should write the H count if it is less than the normal (lowest valence)
// count, like in atoms with radicals.
if (mol.getAtomRadical_NoThrow(idx, -1) > 0)
return true;
// Should we write the H count for an aromatic atom or not?
// In a better world, we would have been checking that the hydrogens
// 'make difference' by de-aromatizing the molecule and comparing
// the hydrogen counts in the de-aromatized atoms with the atoms we
// are writing now.
// In the real world, de-aromatization is complicated and takes time,
// so we write hydrogen counts on all aromatic atoms, except
// uncharged C and O with no radicals, for which we can always tell
// the number of hydrogens by the number of bonds.
//
// Also handle some degerate cases with invalid valences like C[c]1(C)ccccc1
// and store implicit hydrogens for unusual situations
if (aromatic)
{
if (atom_number != ELEM_C && atom_number != ELEM_O)
return true;
if (charge != 0)
return true;
int n_arom, min_conn;
mol.calcAromaticAtomConnectivity(idx, n_arom, min_conn);
if (atom_number == ELEM_C)
{
// Ensure that there can be double bond connected
// But do not save for O=c1ccocc1
if (min_conn > 3 && mol.getVertex(idx).degree() > 3)
return true; // Unusual aromatic Carbon atom
}
if (atom_number == ELEM_O)
{
// Ensure that there can be double bond connected
if (min_conn != 2)
return true; // Unusual aromatic Oxigen atom
}
}
// We also should write the H count if it exceeds the normal (lowest valence)
// count, like in [PH5]
int normal_val, normal_hyd;
int impl_h = mol.getImplicitH_NoThrow(idx, -1);
if (impl_h >= 0)
impl_h += h_to_ignore;
if (mol.isNitrogenV5(idx))
{
normal_val = 4;
normal_hyd = 0;
}
else
{
if (impl_h < 0)
return false; // can not write an undefined H count
int conn = mol.getAtomConnectivity_noImplH(idx) - h_to_ignore;
if (conn < 0)
return false; // this is an aromatic atom -- dealed with that before
if (!Element::calcValence(atom_number, charge, 0, conn, normal_val, normal_hyd, false))
return true;
}
if (impl_h != normal_hyd)
return true;
return false;
}
void Molecule::invalidateAtom(int index, int mask)
{
BaseMolecule::invalidateAtom(index, mask);
}
bool Molecule::restoreAromaticHydrogens(bool unambiguous_only)
{
return MoleculeDearomatizer::restoreHydrogens(*this, unambiguous_only);
}
bool Molecule::standardize(const StandardizeOptions& options)
{
updateEditRevision();
return MoleculeStandardizer::standardize(*this, options);
}
bool Molecule::ionize(float ph, float ph_toll, const IonizeOptions& options)
{
updateEditRevision();
return MoleculeIonizer::ionize(*this, ph, ph_toll, options);
}
bool Molecule::isPossibleFischerProjection(const char* /*options*/)
{
if (!BaseMolecule::hasCoord(*this) || BaseMolecule::hasZCoord(*this))
return false;
for (auto i : edges())
{
if (getBondDirection(i) > 0)
return false;
}
for (auto i : vertices())
{
if ((getAtomNumber(i) == ELEM_C) && (getVertex(i).degree() == 4))
{
const Vertex& v = getVertex(i);
Vec3f& central_atom = getAtomXyz(i);
Vec3f nei_coords[4];
int nei_count = 0;
for (auto j : v.neighbors())
{
nei_coords[nei_count++] = getAtomXyz(v.neiVertex(j));
}
float angle;
Vec3f bond1, bond2;
int ncount = 0;
for (auto j = 0; j < 4; j++)
{
if (j == 3)
{
bond1.diff(nei_coords[3], central_atom);
bond1.normalize();
bond2.diff(nei_coords[0], central_atom);
bond2.normalize();
Vec3f::angle(bond1, bond2, angle);
}
else
{
bond1.diff(nei_coords[j], central_atom);
bond1.normalize();
bond2.diff(nei_coords[j + 1], central_atom);
bond2.normalize();
Vec3f::angle(bond1, bond2, angle);
}
if ((fabs(angle - M_PI / 2.f) < EPSILON) || (fabs(angle - M_PI) < EPSILON))
ncount++;
}
if (ncount == 4)
{
return true;
}
}
}
return false;
}
bool Molecule::isPiBonded(const int atom_index) const
{
const Vertex& vertex = getVertex(atom_index);
for (auto i = vertex.neiBegin(); i != vertex.neiEnd(); i = vertex.neiNext(i))
{
const int order = getBondOrder(vertex.neiEdge(i));
if (order == BOND_DOUBLE || order == BOND_TRIPLE || order == BOND_AROMATIC)
{
return true;
}
}
return false;
}
#ifdef _MSC_VER
#pragma warning(pop)
#endif