core/indigo-core/graph/max_common_subgraph.h (507 lines of code) (raw):

/**************************************************************************** * Copyright (C) from 2009 to Present EPAM Systems. * * This file is part of Indigo toolkit. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ***************************************************************************/ #ifndef _max_common_subgraph #define _max_common_subgraph #include "base_cpp/cancellation_handler.h" #include "base_cpp/d_bitset.h" #include "base_cpp/obj_list.h" #include "base_cpp/output.h" #include "base_cpp/red_black.h" #include "base_cpp/tlscont.h" #include "graph/embedding_enumerator.h" #include "graph/graph.h" #include "math.h" #ifdef _WIN32 #pragma warning(push) #pragma warning(disable : 4251) #endif namespace indigo { class DLLEXPORT MaxCommonSubgraph { public: DECL_ERROR; MaxCommonSubgraph(Graph& subgraph, Graph& supergraph); ~MaxCommonSubgraph(); void setGraphs(Graph& subgraph, Graph& supergraph); // two main methods for maximum common subgraph search // exact searching mcs method. Hanser's algorithm used void findExactMCS(); // approximate method for searching mcs. 2DOM algorithm used void findApproximateMCS(); // parameters for exact method struct ParametersForExact { // boolean true if method reached max iteration number bool isStopped; // max iteration number int maxIteration; // number of solutions that are found by exact algorithm int numberOfSolutions; // throw error if input map is incorrect bool throw_error_for_incorrect_map; }; // parameters for approximate algorithm struct ParametersForApproximate { // error is the number of unmatched edges in solution int error; // max iteration number. int maxIteration; // number of solutions (connected graphs in solution given by algorithm 2DOM) int numberOfSolutions; // if this parameter set to false then only max solution would be writen. true - then all solution and at first place max solution bool randomize; bool standardRandom; }; // two callbacks for edge and vertices matching bool (*conditionEdgeWeight)(Graph& graph1, Graph& graph2, int i, int j, void* userdata); bool (*conditionVerticesColor)(Graph& graph1, Graph& graph2, const int* core_sub, int i, int j, void* userdata); // parameters for mcs methods ParametersForExact parametersForExact; ParametersForApproximate parametersForApproximate; // array for accept input mapping and working with it Array<int> incomingMap; // this method sorts solutions and maximizes number of the rings in graph static int ringsSolutionTerm(Array<int>&, Array<int>&, void*); // returns all maps-solutions-mcs void getSolutionMaps(ObjArray<Array<int>>* v_maps, ObjArray<Array<int>>* e_maps) const; // returns first element in sorted solution array void getMaxSolutionMap(Array<int>* v_map, Array<int>* e_map) const; // callback for sorting solutions (see _vertEdgeSolMap) int (*cbSolutionTerm)(Array<int>& array1, Array<int>& array2, void* userdata); // context for all callbacks (edge and vertices matching and sort solutions void* userdata; // callback for managing/ if return 0 then algorithm will end its work int (*cbEmbedding)(const int* sub_vert_map, const int* sub_edge_map, const void* info, void* userdata); void* embeddingUserdata; // Exact method: Hanser's algorithm //------------------------------------------------------------------------------------------------------------------- // class represent node of the resolution graph (ReGraph) An RePoint represents an association // betwwen two edges of the source graphs G1 and G2 that are compared class RePoint { public: // creates RePoint for input edge ids RePoint(int n1, int n2); // gets edge id in first graph int getid1() const { return _id1; }; // gets edge id in second graph int getid2() const { return _id2; }; // set of neighbour nodes in the ReGraph Dbitset extension; // set of incompatible nodes in the ReGraph Dbitset forbidden; Dbitset allowed_g1; Dbitset allowed_g2; // sets sizes for all containers void setSizes(int size, int size_g1, int size_g2); private: // number of the edge in the graph 1 int _id1; // number of the edge in the graph 2 int _id2; RePoint(const RePoint&); // no implicit copy }; // solution structure for Resolution graph class Solution { public: Solution(){}; int numBits; Dbitset reSolution; Dbitset solutionProj1; Dbitset solutionProj2; private: Solution(const Solution&); // no implicit copy }; // This class implements the Resolution Graph (ReGraph). // The ReGraph is a graph based representation of the search problem. // An ReGraph is constructred from the two compared graphs (G1 and G2). // Each vertex (node) in the ReGraph represents a possible association // from an edge in G1 with an edge in G2. Thus two compatible edges // in two graphs are represented by a vertex in the ReGraph. // Each edge in the ReGraph corresponds to a common adjacency relationship // between the 2 couple of compatible edges associated to the 2 ReGraph nodes // forming this edge. // Resolution Graph class ReGraph { public: ReGraph(); ReGraph(MaxCommonSubgraph& context); // clears resolution graph void clear(); // sets maximum iterations number void setMaxIteration(int m) { _maxIteration = m; }; // set sizes for util variables void setSizes(int n1, int n2); // adds new RePoint to nodes set void addPoint(int id1, int id2) { _graph.add(new RePoint(id1, id2)); }; // main method to perform a query // Parsing of the ReGraph. This is the recursive method // to perform a query. The method will recursively // parse the RGraph thru connected nodes and visiting the // RGraph using allowed adjacency relationship. void parse(bool findAllStructure); // retruns index of RePoint which corespondes to input edges ids int getPointIndex(int i, int j) const; // returns number of nodes (RePoints) in resolution graph int size() const { return _graph.size(); }; // returns true if algorithm has reached maximum iteration bool stopped() { return _stop; }; // gets RePoint with index i RePoint* getPoint(int i) { return _graph[i]; }; // solution getters // begin solution index int solBegin() const { return _solutionObjList.begin(); }; // next solution index int solNext(int index) const { return _solutionObjList.next(index); }; // return false then it is no more solutions bool solIsNotEnd(int index) const { return (index != _solutionObjList.end()); }; // solutions store capacity int solutionSize() const { return _solutionObjList.size(); }; // returns solution list const Dbitset& getSolBitset(int index) const { return _solutionObjList[index].reSolution; }; // retruns project of solution list to graph 1 const Dbitset& getProj1Bitset(int index) const { return _solutionObjList[index].solutionProj1; }; // retruns project of solution list to graph 2 const Dbitset& getProj2Bitset(int index) const { return _solutionObjList[index].solutionProj2; }; void insertSolution(int ins_index, bool ins_after, const Dbitset& sol, const Dbitset& sol_g1, const Dbitset& sol_g2, int num_bits); // callback for managing/ if return 0 then algorithm will end its work int (*cbEmbedding)(const int* sub_vert_map, const int* sub_edge_map, const void* info, void* userdata); void* userdata; std::shared_ptr<CancellationHandler> cancellation_handler; protected: // list of ReGraph nodes each node keeping track of its neighbours PtrArray<RePoint> _graph; // size of ReGRaph int _size; // current number of iterations int _nbIteration; // maximal number of iterations before search break int _maxIteration; // dimensions of the compared graphs int _firstGraphSize; int _secondGraphSize; // flag to define if we want to get all possible 'structures' bool _findAllStructure; // flag to define if search was breaking bool _stop; // Checks if a potantial solution is a real one // (not included in a previous solution) // and add this solution to the solution list // in case of success. void _solution(const Dbitset& traversed, Dbitset& trav_g1, Dbitset& trav_g2); // Determine if there are potential soltution remaining. bool _mustContinue(const Dbitset& pnode_g1, const Dbitset& pnode_g2) const; // solution bitset store's parameters Pool<ObjList<Solution>::Elem> _pool; ObjList<Solution> _solutionObjList; private: ReGraph(const ReGraph&); // no implicit copy }; // Create Resolution Graph for MCSS class ReCreation { public: ReCreation(ReGraph& rgr, MaxCommonSubgraph& context); // creates resolution graph void createRegraph(); // method change input array to map which corresponds to list of ReGraph nodes (they are in bitset) void setCorrespondence(const Dbitset& b, Array<int>& map) const; /* * Inserts solution from the given mapping */ bool insertSolution(const Array<int>& mapping); // sets input mapping to algorithm using bool setMapping(); // creates all solutions int createSolutionMaps(); // retruns all solutions edge and vertices lists void getSolutionListsSub(ObjArray<Array<int>>& v_lists, ObjArray<Array<int>>& e_lists) const; void getSolutionListsSuper(ObjArray<Array<int>>& v_lists, ObjArray<Array<int>>& e_lists) const; protected: // resolution graph to work with ReGraph& _regraph; // max common subgraph as context MaxCommonSubgraph& _context; // creates nodes of resolution graph void _nodeConstructor(); // creates edges of resolution graph void _edgesConstructor(); // returns common vertex id of two edges in graph. returns -1 if edges hasn't it int _getCommonVertex(int e1, int e2, Graph& graph) const; // returns true if two edges in graph has common vertex bool _hasCommonVertex(int e1, int e2, Graph& graph) const; // returns true if common vertices are matched bool _hasCommonSymbol(int e11, int e12, int e21, int e22) const; // returns edge and vertices list void _createList(const Dbitset& proj_bitset, Graph& graph, Array<int>& v_list, Array<int>& e_list) const; private: ReCreation(const ReCreation&); // no implicit copy }; // Approximate algorithm: two stage optimization method (2DOM) //------------------------------------------------------------------------------------------------------------------- // this class is main util for keeping adjacancy matrix and other parameters for fast work of 2DOM algorithm // Adjacency matrix class AdjMatricesStore { public: AdjMatricesStore(MaxCommonSubgraph& context, int maxsize); // creates utilite store void create(Graph& g1, Graph& g2); // creates all solutions int createSolutionMaps(); // retruns size of first graph to compared int getFirstSize() { return _size1; } // returns size of second graph to campare int getSecondSize() { return _size2; } // returns element with input indexes of adjacency matrix of second graph bool getSecondElement(int i, int j) { return _aj2[i]->at(j); } // retruns elements of utilites matrices which are stored matched edges int getFirstIdxEdge(int i, int j) { return _ajEdge1[i]->at(j); } int getSecondIdxEdge(int i, int j) { return _ajEdge2[i]->at(j); } // retruns degree of vertex in first graph adj matrix int getFirstVDegree(int i) { return _degreeVec1[i]; } // retruns degree of vertex in first graph adj matrix int getSecondVDegree(int i) { return _degreeVec2[i]; } // returns number of unmatched edges int countErrorAtEdges(int i, int j); // returns color and weight conditions if it is presented in max common subgraph context // methods takes account to corresponding between input graph ids and its equals // this for two graphs bool getEdgeWeightCondition(int i, int j); // this for two graphs bool getVerticesColorCondition(int i, int j); // this for one graph (first) bool getVColorOneCondition(int i, int j); // returns dbitset represent row in adjacency matrix of first graph Dbitset* getFirstRow(int i) { return _daj1[i]; } // returns dbitset represent row in adjacency matrix of first graph Dbitset* getSecondRow(int i) { return _daj2[i]; } // retruns solution correspondings between two graphs int* getX() { return _x.ptr(); } int* getY() { return _y.ptr(); } void getSolutions(ObjArray<Array<int>>& v_maps); // returns correspondence parameters between each vertex and vertex in other graph with the same label int getFLSize(int i) { return _mLabel1[i]->size(); } int getFLV(int i, int j) { return _mLabel1[i]->at(j); } // context includes input parameters and output solution MaxCommonSubgraph& _context; protected: // sizes of graphs to compared int _size1; int _size2; // adjacency matrix // PtrArray< Array<bool> > _aj1; PtrArray<Array<bool>> _aj2; // indexes of edges PtrArray<Array<int>> _ajEdge1; PtrArray<Array<int>> _ajEdge2; // correspondence between each vertex and vertex in other graph with the same label PtrArray<Array<int>> _mLabel1; // adjacency matrix in bitset view PtrArray<Dbitset> _daj1; PtrArray<Dbitset> _daj2; // correspondence between two graphs Array<int> _x; Array<int> _y; // correspondence between real graph and matrix Array<int> _cr1; Array<int> _cr2; // degree vectors Array<int> _degreeVec1; Array<int> _degreeVec2; // matrix with not corresponding edges PtrArray<Array<int>> _errorEdgesMatrix; // maps Array<int> _map; Array<int> _invmap; // max size to reserve space in arrays int _maxsize; // true if two input graphs was swapped then initializes bool _swap; // sets adjacency matrix elements for two graphs void _setFirstElement(int i, int j, int value); void _setSecondElement(int i, int j, int value); // returns reverse correspondence between input graphs and its adj matrices int _getFirstC(int x); int _getSecondC(int x); // creation of matrices void _createCorrespondence(); void _createLabelMatrices(); void _createAdjacencyMatrices(); void _createErrorEdgesMatrix(); void _createMaps(); // creation of graph for solution void _createConnectedGraph(Graph& graph, Array<int>& map_gr); // two graphs to compare Graph* _graph1; Graph* _graph2; // retruns false if there is no need to swap graphs bool _checkSize(Graph& g1, Graph& g2); // makes invert map void _makeInvertMap(Array<int>& map, Array<int>& invmap); private: AdjMatricesStore(const AdjMatricesStore&); // no implicit copy }; // Construction stage: greedy method class Greedy { public: Greedy(AdjMatricesStore& aj); // main method in greedy stage of 2DOM algorithm void greedyMethod(); private: // creates lists if vertices void _createLgLh(); // returns number of matched edges int _matchedEdges(); // keeping util class for 2DOM method AdjMatricesStore& _adjMstore; // list of unsigned vertices in first graph Array<int> _unsignVert1; // list of unsigned vertices in second graph PtrArray<Array<int>> _unsignVert2; // adjancy status whether vertex in 2 is adjaent to assigned Array<int> _adjStatus; // assign vertex from 1 graph to 2 int* _x; // assign vertex from 2 graph to 1 int* _y; // size of first graph int _n; // size of second graph int _m; protected: // callbacks for sorting list of vertices static int _compareFirstDegree(int& i1, int& i2, void* context); static int _compareSecondDegree(int& i1, int& i2, void* context); private: Greedy(const Greedy&); // no implicit copy }; // Refinement stage: random discrete descent method class RandomDisDec { public: enum { MAX_ITERATION = 1000 }; RandomDisDec(AdjMatricesStore& aj); // main method for refinement stage void refinementStage(); // returns number of unmatched edges int getError() { return _errorNumber; } // sets maximum iteration number limit void setIterationNumber(int max); std::shared_ptr<CancellationHandler> cancellation_handler; private: // returns number of unmatched edges int _goalFunction(); // returns true if there is advantage (minimum error) to do move bool _acceptanceMove(int x); // returns true if there is advantage to do swap operation bool _acceptanceSwap(int x, int y); // creates list of error vertices void _makeLe(); // keeping util for 2DOM AdjMatricesStore& _adjMstore; // assign vertex from 1 graph to 2 int* _x; // assign vertex from 2 graph to 1 int* _y; // error list CP_DECL; TL_CP_DECL(Array<int>, _errorList); // list of error vertrces TL_CP_DECL(Array<int>, _listErrVertices); // size of first graph int _n; // size of second graph int _m; // sum of all errors int _errorNumber; // new state`s sum of all errors int _newErrorNumber; // flag for keeping breaks bool _stop; // max iteration number. Algortihm breaks its work then reached it int _maxIteration; // for stucking override Array<int> _stateArray; // error number in previous stuck state int _errorNumberStuck; // number of iterations before algorithm consider current state as stuck state int _stuckCount; // save current state in case of stuck void _saveState(); // load state void _loadState(); private: RandomDisDec(const RandomDisDec&); // no implicit copy }; // Randomizator for approximate algorithm class DLLEXPORT RandomHandler { public: enum { DEFSEED = 54217137, BIG_PRIME = 899999963 }; RandomHandler(int ijkl) : strand(false) { ranmarin(ijkl % BIG_PRIME); } RandomHandler(long ijkl) : strand(false) { ranmarin(ijkl % BIG_PRIME); } RandomHandler() : strand(false) { ranmarin(DEFSEED); }; void ranmarin(int ijkl) { int ij, kl; int i, ii, j, jj, k, l, m; double s, t; u.resize(97); uvec.resize(97); ij = ijkl / 30082; kl = ijkl - 30082 * ij; i = ((ij / 177) % 177) + 2; j = (ij % 177) + 2; k = ((kl / 169) % 178) + 1; l = kl % 169; for (ii = 0; ii < 97; ++ii) { s = 0.0; t = 0.5; for (jj = 0; jj < 24; ++jj) { m = (((i * j) % 179) * k) % 179; i = j; j = k; k = m; l = (53 * l + 1) % 169; if (((l * m) % 64) >= 32) s += t; t *= 0.5; } u[ii] = s; } c = 362436.0 / 16777216.0; cd = 7654321.0 / 16777216.0; cm = 16777213.0 / 16777216.0; i97 = 96; j97 = 32; } inline double next() { double uni; uni = u[i97] - u[j97]; if (uni < 0.0) uni += 1.0; u[i97] = uni; if (--i97 < 0) i97 = 96; if (--j97 < 0) j97 = 96; c -= cd; if (c < 0.0) c += cm; uni -= c; if (uni < 0.0) uni += 1.0; return uni; } inline int next(int max) { if (strand) return (rand() % max); else return (int)(max * next()); } void next(Array<double>& d) { double uni; int n = d.size(); for (int i = 0; i < n; ++i) { uni = u[i97] - u[j97]; if (uni < 0.0) uni += 1.0; u[i97] = uni; if (--i97 < 0) i97 = 96; if (--j97 < 0) j97 = 96; c -= cd; if (c < 0.0) c += cm; uni -= c; if (uni < 0.0) uni += 1.0; d[i] = uni; } } double c, cd, cm; Array<double> u; Array<double> uvec; int i97, j97; bool strand; }; protected: // keeping graphs Graph* _subgraph; Graph* _supergraph; bool _findTrivialMcs(); void _clearSolutionMaps(); void _addSolutionMap(Array<int>& v_map, Array<int>& e_map); // method returns true if edges with input number completely matched bool _getEdgeColorCondition(Graph& graph1, Graph& graph2, int i, int j) const; // returns all solutions void _getSolutionMaps(int count, ObjArray<Array<int>>& v_maps, ObjArray<Array<int>>& e_maps) const; // array for keeping all solutions. In each subarray element[0] = vertex size, [1] = edge size, and // next '[0]' elements for vertex map, next '[1]' for edge map (in sum 2+vertexEnd()+edgeEnd() elements) ObjArray<Array<int>> _vertEdgeSolMap; RandomHandler _random; private: MaxCommonSubgraph(const MaxCommonSubgraph&); // no implicit copy }; // for searching substructure with map class SubstructureMcs { public: enum { UNMAPPED = -1 }; // constuctors SubstructureMcs(); SubstructureMcs(Graph& sub, Graph& super); virtual ~SubstructureMcs() { } // sets graphs for substructure search considering their size void setGraphs(Graph& sub, Graph& super); // searches substructure for graphs and maps vertices virtual bool searchSubstructure(Array<int>* map); // condition for edge match bool (*cbMatchEdge)(Graph& graph1, Graph& graph2, int i, int j, void* userdata); // condition for vertex match bool (*cbMatchVertex)(Graph& graph1, Graph& graph2, const int* core_sub, int i, int j, void* userdata); void* userdata; // returns true if graphs was swapped then initializing bool isInverted() { return _invert; }; // function for substructure search always return 0 static int _embedding(Graph& subgraph, Graph& supergraph, int* core_sub, int* core_super, void* userdata); protected: // ptrs for input graphs Graph* _sub; Graph* _super; // variable for considering swap input graphs bool _invert; private: SubstructureMcs(const SubstructureMcs&); // no implicit copy }; } // namespace indigo #ifdef _WIN32 #pragma warning(pop) #endif #endif