core/indigo-core/molecule/query_molecule.h (332 lines of code) (raw):
/****************************************************************************
* Copyright (C) from 2009 to Present EPAM Systems.
*
* This file is part of Indigo toolkit.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
***************************************************************************/
#ifndef __query_molecule_h__
#define __query_molecule_h__
#include "base_cpp/ptr_array.h"
#include "molecule/base_molecule.h"
#include "molecule/molecule_3d_constraints.h"
#include "molecule/molecule_arom.h"
#include <memory>
#ifdef _WIN32
#pragma warning(push)
#pragma warning(disable : 4251)
#endif
namespace indigo
{
constexpr int VALUE_UNKNOWN = -1;
enum
{
SKIP_3D_CONSTRAINTS = 0x0100,
SKIP_FIXED_ATOMS = 0x0200,
SKIP_RGROUPS = 0x0400,
SKIP_AROMATICITY = 0x0800,
SKIP_COMPONENTS = 0x1000
};
enum
{
_ATOM_R,
_ATOM_A,
_ATOM_X,
_ATOM_Q,
_ATOM_M,
_ATOM_AH,
_ATOM_XH,
_ATOM_QH,
_ATOM_MH,
_ATOM_LIST,
_ATOM_NOTLIST,
_ATOM_PSEUDO,
_ATOM_TEMPLATE,
_ATOM_ELEMENT
};
class Output;
class DLLEXPORT QueryMolecule : public BaseMolecule
{
public:
enum OpType
{
OP_NONE, // used on totally unconstrained atoms
OP_AND,
OP_OR,
OP_NOT,
ATOM_NUMBER,
ATOM_PSEUDO,
ATOM_RSITE,
ATOM_CHARGE,
ATOM_ISOTOPE,
ATOM_RADICAL,
ATOM_VALENCE,
// ATOM_DEGREE,
ATOM_CONNECTIVITY,
ATOM_TOTAL_BOND_ORDER,
ATOM_TOTAL_H,
ATOM_IMPLICIT_H,
ATOM_SUBSTITUENTS,
ATOM_SUBSTITUENTS_AS_DRAWN,
ATOM_SSSR_RINGS,
ATOM_SMALLEST_RING_SIZE,
ATOM_RING_BONDS,
ATOM_RING_BONDS_AS_DRAWN,
ATOM_UNSATURATION,
ATOM_FRAGMENT,
ATOM_AROMATICITY,
ATOM_TEMPLATE,
ATOM_TEMPLATE_SEQID,
ATOM_TEMPLATE_CLASS,
ATOM_PI_BONDED,
ATOM_CHIRALITY,
BOND_ORDER,
BOND_TOPOLOGY,
BOND_ANY,
HIGHLIGHTING
};
enum
{
CHIRALITY_GENERAL,
CHIRALITY_TETRAHEDRAL,
CHIRALITY_ALLENE_LIKE,
CHIRALITY_SQUARE_PLANAR,
CHIRALITY_TRIGONAL_BIPYRAMIDAL,
CHIRALITY_OCTAHEDRAL,
};
static constexpr int CHIRALITY_TETRAHEDRAL_MAX = 2;
static constexpr int CHIRALITY_ALLENE_LIKE_MAX = 2;
static constexpr int CHIRALITY_SQUARE_PLANAR_MAX = 3;
static constexpr int CHIRALITY_TRIGONAL_BIPYRAMIDAL_MAX = 20;
static constexpr int CHIRALITY_OCTAHEDRAL_MAX = 30;
enum
{
CHIRALITY_ANTICLOCKWISE = 1,
CHIRALITY_CLOCKWISE = 2,
CHIRALITY_OR_UNSPECIFIED = 0x100 // should be twice bigger tnan any of CHIRALITY_*_MAX
};
class DLLEXPORT Node
{
public:
Node(int type_);
virtual ~Node();
OpType type; // OP_*** or ATOM_*** or BOND_***
// type is OP_NOT: one child
// type is OP_AND or OP_OR: more that one child
// otherwise: no children
PtrArray<Node> children;
// Check if node has any constraint of the specific type
bool hasConstraint(int what_type);
// Check if there is no other constraint, except specified ones
bool hasNoConstraintExcept(int what_type);
bool hasNoConstraintExcept(int what_type1, int what_type2);
bool hasNoConstraintExcept(std::vector<int> what_types);
// Remove all constraints of the given type
void removeConstraints(int what_type);
bool sureValue(int what_type, int& value) const;
bool sureValueInv(int what_type, int& value) const;
bool possibleValue(int what_type, int what_value);
bool possibleValueInv(int what_type, int what_value);
bool possibleValuePair(int what_type1, int what_value1, int what_type2, int what_value2);
bool possibleValuePairInv(int what_type1, int what_value1, int what_type2, int what_value2);
bool sureValueBelongs(int what_type, const int* arr, int count);
bool sureValueBelongsInv(int what_type, const int* arr, int count);
bool hasOP_OR();
// Optimize query for faster substructure search
void optimize();
protected:
// "neu" means "new" in German. This should have been a static
// method, but static methods can not be virtual, and so it is not static.
virtual Node* _neu() = 0;
static Node* _und(Node* node1, Node* node2);
static Node* _oder(Node* node1, Node* node2);
static Node* _nicht(Node* node);
virtual bool _possibleValue(int what_type, int what_value) = 0;
virtual bool _possibleValuePair(int what_type1, int what_value1, int what_type2, int what_value2) = 0;
Node* _findSureConstraint(int what_type, int& count);
virtual bool _sureValue(int what_type, int& value_out) const = 0;
virtual bool _sureValueBelongs(int what_type, const int* arr, int count) = 0;
virtual void _optimize(){};
};
class DLLEXPORT Atom : public Node
{
public:
Atom();
Atom(int type, int value);
Atom(int type, int value_min, int value_max);
Atom(int type, const char* value);
Atom(int type, QueryMolecule* value);
~Atom() override;
Atom* clone() const;
void copy(const Atom& other);
Atom* child(int idx);
bool valueWithinRange(int value);
bool hasConstraintWithValue(int what_type, int what_value);
bool updateConstraintWithValue(int what_type, int new_value);
Atom* sureConstraint(int what_type);
int value_min;
int value_max;
int occurrence_idx;
// available only when type is ATOM_PSEUDO or ATOM_TEMPLATE or ATOM_TEMPLATE_CLASS
Array<char> alias;
// available only when type is ATOM_FRAGMENT
std::unique_ptr<QueryMolecule> fragment;
// when type is ATOM_RSITE, the value (value_min=valuemax)
// are 32 bits, each allowing an r-group with corresponding number
// to go for this atom. Simple 'R' atoms have this field equal to zero.
// "und" means "and" in German. "and" is a C++ keyword.
static Atom* und(Atom* atom1, Atom* atom2);
// "oder" means "or" in German. "or" is a C++ keyword.
static Atom* oder(Atom* atom1, Atom* atom2);
// "nicht" means "not" in German. "not" is a C++ keyword.
static Atom* nicht(Atom* atom);
protected:
Node* _neu() override;
bool _possibleValue(int what_type, int what_value) override;
bool _possibleValuePair(int what_type1, int what_value1, int what_type2, int what_value2) override;
bool _sureValue(int what_type, int& value_out) const override;
bool _sureValueBelongs(int what_type, const int* arr, int count) override;
void _optimize() override;
DECL_ERROR;
};
class DLLEXPORT Bond : public Node
{
public:
Bond();
Bond(int type_);
Bond(int type_, int value_);
Bond(int type_, int value_, int direction_);
~Bond() override;
int value;
int direction;
Bond* clone();
Bond* child(int idx);
// "und" means "and" in German. "and" is a C++ keyword.
static Bond* und(Bond* node1, Bond* node2);
// "oder" means "or" in German. "or" is a C++ keyword.
static Bond* oder(Bond* node1, Bond* node2);
// "nicht" means "not" in German. "not" is a C++ keyword.
static Bond* nicht(Bond* node);
protected:
Node* _neu() override;
bool _possibleValue(int what_type, int what_value) override;
bool _possibleValuePair(int what_type1, int what_value1, int what_type2, int what_value2) override;
bool _sureValue(int what_type, int& value_out) const override;
bool _sureValueBelongs(int what_type, const int* arr, int count) override;
};
QueryMolecule();
~QueryMolecule() override;
void clear() override;
BaseMolecule* neu() override;
QueryMolecule& asQueryMolecule() override;
bool isQueryMolecule() override;
static bool isAromaticByCaseAtom(QueryMolecule::Node* atom);
static bool isAromaticByCaseAtom(int num);
static bool isOrganicSubset(QueryMolecule::Atom* atom);
static bool isOrganicSubset(int num);
int getAtomNumber(int idx) override;
int getAtomCharge(int idx) override;
int getAtomIsotope(int idx) override;
int getAtomRadical(int idx) override;
int getExplicitValence(int idx) override;
void setExplicitValence(int idx, int valence) override;
int getAtomAromaticity(int idx) override;
int getAtomValence(int idx) override;
int getAtomSubstCount(int idx) override;
int getAtomRingBondsCount(int idx) override;
int getAtomConnectivity(int idx) override;
int calcAtomMaxH(int idx, int conn);
int getAtomMaxH(int idx) override;
int getAtomMinH(int idx) override;
int getAtomTotalH(int idx) override;
int getAtomConnectedH(int idx);
bool isPseudoAtom(int idx) override;
const char* getPseudoAtom(int idx) override;
int addTemplateAtom(const char* text) override;
bool isTemplateAtom(int idx) override;
int getTemplateAtomOccurrence(int idx) override;
bool isRSite(int atom_idx) override;
dword getRSiteBits(int atom_idx) override;
void allowRGroupOnRSite(int atom_idx, int rg_idx) override;
bool isSaturatedAtom(int idx) override;
int getBondOrder(int idx) const override;
int getBondTopology(int idx) override;
bool atomNumberBelongs(int idx, const int* numbers, int count) override;
bool possibleAtomNumber(int idx, int number) override;
bool possibleAtomNumberAndCharge(int idx, int number, int charge) override;
bool possibleAtomNumberAndIsotope(int idx, int number, int isotope) override;
bool possibleAtomIsotope(int idx, int number) override;
bool possibleAtomCharge(int idx, int charge) override;
virtual bool possibleAtomRadical(int idx, int radical);
void getAtomDescription(int idx, Array<char>& description) override;
void getBondDescription(int idx, Array<char>& description) override;
bool possibleBondOrder(int idx, int order) override;
bool possibleAromaticBond(int idx);
bool possibleNitrogenV5(int idx);
static std::string getSmartsBondStr(QueryMolecule::Bond* bond);
static void writeSmartsBond(Output& output, QueryMolecule::Bond* bond, bool has_or_parent);
static std::string getSmartsAtomStr(QueryMolecule::Atom* atom, int original_format, bool is_substr = true);
static std::string getMolMrvSmaExtension(QueryMolecule& qm, int aid);
static void writeSmartsAtom(Output& output, Atom* atom, int aam, int chirality, int depth, bool has_or_parent, bool has_not_parent,
int original_format);
enum QUERY_ATOM
{
QUERY_ATOM_UNKNOWN = -1,
QUERY_ATOM_A = 0,
QUERY_ATOM_X,
QUERY_ATOM_Q,
QUERY_ATOM_M,
QUERY_ATOM_AH,
QUERY_ATOM_XH,
QUERY_ATOM_QH,
QUERY_ATOM_MH,
QUERY_ATOM_LIST,
QUERY_ATOM_NOTLIST,
QUERY_ATOM_SINGLE
};
static bool isKnownAttr(QueryMolecule::Atom& qa);
static bool isNotAtom(QueryMolecule::Atom& qa, int elem);
static QueryMolecule::Atom* stripKnownAttrs(QueryMolecule::Atom& qa);
static bool collectAtomList(Atom& qa, Array<int>& list, bool& notList);
static int parseQueryAtom(QueryMolecule::Atom& qa, Array<int>& list);
static int parseQueryAtom(QueryMolecule& qm, int aid, Array<int>& list);
static bool queryAtomIsRegular(QueryMolecule& qm, int aid);
static bool queryAtomIsSpecial(QueryMolecule& qm, int aid);
static bool queryAtomIsSpecial(int query_atom_type);
static Bond* getBondOrderTerm(Bond& qb, bool& complex);
static bool isOrBond(Bond& qb, int type1, int type2);
static bool isSingleOrDouble(Bond& qb);
static bool isSmartsEmptyBond(Bond& qb);
static int getQueryBondType(Bond& qb);
static int getQueryBondType(Bond& qb, int& direction, bool& negative);
static int getAtomType(const char* label);
static void getQueryAtomLabel(int qa, Array<char>& result);
static QueryMolecule::Bond* createQueryMoleculeBond(int order, int topology, int direction);
bool bondStereoCare(int idx) override;
void setBondStereoCare(int idx, bool stereo_care);
bool aromatize(const AromaticityOptions& options) override;
bool dearomatize(const AromaticityOptions& options) override;
int addAtom(Atom* atom);
Atom& getAtom(int idx);
Atom* releaseAtom(int idx);
void resetAtom(int idx, Atom* atom);
static bool isAtomProperty(OpType type);
Bond& getBond(int idx);
Bond* releaseBond(int idx);
void resetBond(int idx, Bond* bond);
int addBond(int beg, int end, Bond* bond);
int addAtom(int label) override;
int addBond(int beg, int end, int order) override;
int addBond_Silent(int beg, int end, int order) override;
int getImplicitH(int idx, bool impl_h_no_throw) override;
void setImplicitH(int idx, int impl_h) override;
void optimize();
Molecule3dConstraints spatial_constraints;
Array<int> fixed_atoms;
QueryMoleculeAromaticity aromaticity;
Array<char> fragment_smarts;
// for component-level grouping of SMARTS
// components[i] = 0 means nothing;
// components[i] = components[j] > 0 means that i-th and j-th vertices
// must belong to the same connected component of the target molecule;
// components[i] != components[j] > 0 means that i-th and j-th vertices
// must belong to different connected components of the target molecule
Array<int> components;
void getComponentNeighbors(std::list<std::unordered_set<int>>& componentNeighbors);
void invalidateAtom(int index, int mask) override;
int getAtomMaxExteralConnectivity(int idx);
int _calcAtomConnectivity(int idx);
bool standardize(const StandardizeOptions& options);
static int parseQueryAtomSmarts(QueryMolecule& qm, int aid, std::vector<std::unique_ptr<Atom>>& list, std::map<int, std::unique_ptr<Atom>>& properties);
protected:
void _getAtomDescription(Atom* atom, Output& out, int depth);
static void _getAtomChiralityDescription(Atom* atom, Output& output);
void _getBondDescription(Bond* bond, Output& out);
int _getAtomMinH(Atom* atom);
void _flipBond(int atom_parent, int atom_from, int atom_to) override;
void _mergeWithSubmolecule(BaseMolecule& bmol, const Array<int>& vertices, const Array<int>* edges, const Array<int>& mapping, int skip_flags) override;
void _postMergeWithSubmolecule(BaseMolecule& bmol, const Array<int>& vertices, const Array<int>* edges, const Array<int>& mapping,
int skip_flags) override;
void _removeAtoms(const Array<int>& indices, const int* mapping) override;
void _removeBonds(const Array<int>& indices) override;
using AtomList = std::pair<bool, std::set<int>>;
static bool _isAtomListOr(Atom* pqa, std::vector<std::unique_ptr<Atom>>& list);
static bool _isAtomOrListAndProps(Atom* pqa, std::vector<std::unique_ptr<Atom>>& list, bool& neg, std::map<int, std::unique_ptr<Atom>>& properties);
static bool _isAtomList(Atom* qa, AtomList list);
static bool _tryToConvertToList(Atom* p_query_atom, std::vector<std::unique_ptr<Atom>>& atoms, std::map<int, std::unique_ptr<Atom>>& properties);
Array<int> _min_h;
Array<bool> _bond_stereo_care;
PtrArray<Atom> _atoms;
PtrArray<Bond> _bonds;
};
} // namespace indigo
#ifdef _WIN32
#pragma warning(pop)
#endif
#endif