contrib/champ/champ.c (4,439 lines of code) (raw):
/*
A* -------------------------------------------------------------------
B* This file contains source code for the PyMOL computer program
C* copyright 1998-2000 by Warren Lyford Delano of DeLano Scientific.
D* -------------------------------------------------------------------
E* It is unlawful to modify or remove this copyright notice.
F* -------------------------------------------------------------------
G* Please see the accompanying LICENSE file for further information.
H* -------------------------------------------------------------------
I* Additional authors of this source file include:
-*
-*
-*
Z* -------------------------------------------------------------------
*/
#include"os_python.h"
#include"os_std.h"
#include"os_memory.h"
#include"const.h"
#include"err2.h"
#include"sort.h"
#include"feedback2.h"
#include"mac.h"
#include"vla.h"
#include"champ.h"
#include"strblock.h"
#include"chiral.h"
#define _MATCHDEBUG
#define _MATCHDEBUG2
#define _RINGDEBUG
#define _AROMDEBUG
char *ChampParseAliphaticAtom(CChamp *I,char *c,int atom,int mask,int len,int imp_hyd);
char *ChampParseAromaticAtom(CChamp *I,char *c,int atom,int mask,int len,int imp_hyd);
char *ChampParseStringAtom(CChamp *I,char *c,int atom,int len);
int ChampParseAtomBlock(CChamp *I,char **c_ptr,int cur_atom);
void ChampAtomDump(CChamp *I,int index);
void ChampAtomFlagDump(CChamp *I,int index);
int ChampAddBondToAtom(CChamp *I,int atom_index,int bond_index);
int ChampMatch(CChamp *I,int template,int target,
int unique_start,int n_wanted,int *match_start,int tag_mode);
int ChampMatch2(CChamp *I,int template,int target,
int start_tmpl,int start_targ,
int n_wanted,int *match_start,int tag_mode);
int ChampFindUniqueStart(CChamp *I,int template,int target,int *multiplicity);
int ChampUniqueListNew(CChamp *I,int atom, int unique_list);
void ChampUniqueListFree(CChamp *I,int unique_list);
void ChampMatchDump(CChamp *I,int match_idx);
void ChampMatchFree(CChamp *I,int match_idx);
void ChampMatchFreeChain(CChamp *I,int match_idx);
void ChampCountRings(CChamp *I,int index);
void ChampPrepareTarget(CChamp *I,int index);
void ChampPreparePattern(CChamp *I,int index);
char *ChampParseBlockAtom(CChamp *I,char *c,int atom,int mask,int len,int not_flag);
void ChampCountBondsEtc(CChamp *I,int index);
void ChampCheckCharge(CChamp *I,int index);
char *ChampParseTag(CChamp *I,char *c,unsigned int *map,unsigned int *not_map,int *ok);
static int num_to_ring[12] = {
0,
0,
0,
cH_Ring3,
cH_Ring4,
cH_Ring5,
cH_Ring6,
cH_Ring7,
cH_Ring8,
0,
0,
0,};
static int num_to_valence[9] = {
cH_0Valence,
cH_1Valence,
cH_2Valence,
cH_3Valence,
cH_4Valence,
cH_5Valence,
cH_6Valence,
cH_7Valence,
cH_8Valence };
static int num_to_degree[9] = {
cH_0Bond,
cH_1Bond,
cH_2Bond,
cH_3Bond,
cH_4Bond,
cH_5Bond,
cH_6Bond,
cH_7Bond,
cH_8Bond
};
#ifdef _HAPPY_UT
static void diff(char *p,char *q)
{
int same=true;
char *s1,*s2;
s1=p;
s2=q;
while((*p)&&(*q)) {
if(*p!=*q)
if(!((((*p)>='0')&&((*p)<='9'))&&
(((*q)>='0')&&((*q)<='9'))))
{
same=false;
break;
}
q++;
p++;
}
if(!same)
printf("%s\n%s\n",s1,s2);
}
int main(int argc, char *argv[])
{
CChamp *h;
int p;
int pp = 0;
char *smi;
char buffer[1024];
FILE *f,*g;
int a,b,c=0;
int len;
int pat_list=0;
int idx_list=0;
int n_mat;
int match_start;
FeedbackInit();
switch(5) {
case 5: /* N-way cross comparison */
h = ChampNew();
f = fopen("test/ref/test01.smi","r");
while(!feof(f)) {
buffer[0]=0;
fgets(buffer,1024,f);
len = strlen(buffer);
if(len) {
if(buffer[len-1]<' ') buffer[len-1]=0;
p = ChampSmiToPat(h,buffer);
if(p) {
ChampPreparePattern(h,p);
a = h->Pat[p].unique_atom;
printf("%d %s %d %d\n",p,buffer,a,ListLen(h->Int3,a));
h->Pat[p].link = pat_list;
pat_list = p;
} else {
printf("error %s\n",buffer);
}
c++;
if(c==5000)
break;
}
}
fclose(f);
c = 0;
/* we now have a set of patterns loaded... */
pp = pat_list;
while(pp) {
if(!h->Pat[pp].link)
break;
p = pat_list;
n_mat = 0;
while(p) {
if(!h->Pat[p].link)
break;
n_mat += ChampMatch(h,pp,p,
ChampFindUniqueStart(h,pp,p,NULL),1,NULL);
p = h->Pat[p].link;
}
c++;
smi = ChampPatToSmiVLA(h,pp,NULL);
printf("%d %d %s\n",c,n_mat,smi);
pp = h->Pat[pp].link;
vla_free(smi);
}
ChampFree(h);
break;
case 4: /* 1-way cross comparison */
h = ChampNew();
f = fopen("test/ref/test01.smi","r");
while(!feof(f)) {
buffer[0]=0;
fgets(buffer,1024,f);
len = strlen(buffer);
if(len) {
if(buffer[len-1]<' ') buffer[len-1]=0;
p = ChampSmiToPat(h,buffer);
if(p&&(!pp)) {
ChampPreparePattern(h,p);
pp = p;
printf("%s\n",buffer);
} else if(p) {
/* locate unique atoms */
ChampPreparePattern(h,p);
b = ChampFindUniqueStart(h,pp,p,NULL);
match_start = 0;
n_mat = ChampMatch(h,pp,p,b,100,&match_start);
/* ChampPatDump(I,template);
ChampMatchDump(I,match_start);*/
ChampMatchFreeChain(h,match_start);
if(n_mat) {
printf("%d %d %d %d %d %d %s\n",c,n_mat,pp,p,b,h->Int3[b].value[0],buffer);
}
ChampPatFree(h,p);
} else {
printf("error %s\n",buffer);
}
c++;
if(0&&(c==1000))
break;
}
}
fclose(f);
/* we now have a set of patterns loaded... */
ChampFree(h);
break;
case 3: /* self comparison */
h = ChampNew();
f = fopen("test/ref/test01.smi","r");
while(!feof(f)) {
buffer[0]=0;
fgets(buffer,1024,f);
len = strlen(buffer);
if(len) {
if(buffer[len-1]<' ') buffer[len-1]=0;
p = ChampSmiToPat(h,buffer);
if(p) {
/* locate unique atoms */
ChampPreparePattern(h,p);
b = ChampFindUniqueStart(h,p,p,NULL);
match_start=0;
n_mat = ChampMatch(h,p,p,b,100,&match_start);
if(!n_mat) {
printf("Error!");
exit(0);
} else {
printf("%d %d %s\n",c,n_mat,buffer);
}
ChampMatchFreeChain(h,match_start);
ChampPatFree(h,p);
} else {
printf("error %s\n",buffer);
}
c++;
if(0&&(c==10))
break;
}
}
fclose(f);
/* we now have a set of patterns loaded... */
ChampFree(h);
break;
case 2: /* unique atoms */
h = ChampNew();
f = fopen("test/ref/test01.smi","r");
while(!feof(f)) {
buffer[0]=0;
fgets(buffer,1024,f);
len = strlen(buffer);
if(len) {
if(buffer[len-1]<' ') buffer[len-1]=0;
p = ChampSmiToPat(h,buffer);
if(p) {
ChampPrepareUnique(h,p);
a = h->Pat[p].unique_atom;
printf("%d %s %d %d\n",p,buffer,a,ListLen(h->Int3,a));
h->Pat[p].link = pat_list;
pat_list = p;
} else {
printf("error %s\n",buffer);
}
c++;
if(c==10)
break;
}
}
fclose(f);
/* we now have a set of patterns loaded... */
p = pat_list;
while(p) {
if(!h->Pat[p].link)
break;
b = ChampFindUniqueStart(h,p,h->Pat[p].link,&c);
printf("\n%d vs %d, unique %d (mult %d)\n",p,h->Pat[p].link,b,c);
idx_list = h->Int3[b].value[2];
while(idx_list) {
ChampAtomToString(h,h->Int[idx_list].value,buffer);
printf("atom %s\n",buffer);
idx_list = h->Int[idx_list].link;
}
p = h->Pat[p].link;
}
ChampFree(h);
break;
case 1: /* read & write smiles */
/* FeedbackEnable(FB_smiles_parsing,FB_everything);
FeedbackEnable(FB_smiles_creation,FB_everything);*/
h = ChampNew();
f = fopen("test/ref/test01.smi","r");
g = fopen("test/cmp/test01.smi","w");
while(!feof(f)) {
buffer[0]=0;
fgets(buffer,1024,f);
len = strlen(buffer);
if(len) {
if(buffer[len-1]<' ')
buffer[len-1]=0;
p = ChampSmiToPat(h,buffer);
if(p) {
/* ChampPatDump(h,p);*/
smi = ChampPatToSmiVLA(h,p,NULL);
fprintf(g,"%s\n",smi);
diff(buffer,smi);
vla_free(smi);
ChampPatFree(h,p);
} else {
printf("error %s\n",buffer);
}
if(!(c&0xFFF))
printf(" %d\n",c);
c++;
}
}
fclose(f);
fclose(g);
ChampFree(h);
break;
}
FeedbackFree();
os_memory_dump();
return 0;
}
#endif
char *ChampParseTag(CChamp *I,char *c,unsigned int *map,unsigned int *not_map,int *ok)
{
/* parse bit masks like <1> <1,2,3> <12,3,1> etc... */
int map_mask;
int map_index;
int not_flag = false;
while(*ok) {
if((*c)=='>') {
c++;
break;
}
if(!c) {
*ok=false;
break;
}
if(*c==';') {
not_flag=false;
c++;
} else if(*c=='!') {
not_flag=true;
c++;
} else if((*c>='0')&&(*c<='9')) {
if((*(c+1)>='0')&&(*(c+1)<='9')) {
map_index = (*c-'0')*10+(*(c+1)-'0');
c+=2;
} else {
map_index = (*c-'0');
c++;
}
map_mask = 0x1;
while(map_index) {
map_mask = (map_mask<<1);
map_index--;
}
if(not_flag) {
*not_map|=map_mask;
} else {
*map|=map_mask;
}
} else
c++;
}
return(c);
}
static void merge_lineages(CChamp *I,int *src,int *src_mask,int *dst,int *dst_mask)
{
int i;
int i_src;
int i_dst;
i_src = *src;
i_dst = *dst;
while(i_src) {
i = I->Int[i_src].value;
if(!dst_mask[i]) {
dst_mask[i]=1;
*dst = ListElemPushInt(&I->Int,*dst,i);
}
i_src = I->Int[i_src].link;
}
while(i_dst) {
i = I->Int[i_dst].value;
if(!src_mask[i]) {
src_mask[i]=1;
*src = ListElemPushInt(&I->Int,*src,i);
}
i_dst = I->Int[i_dst].link;
}
}
static int combine_lineage(CChamp *I,int src,int dst,int *lin_mask)
{
int i;
while(src) {
i = I->Int[src].value;
if(!lin_mask[i]) {
lin_mask[i]=1;
dst = ListElemPushInt(&I->Int,dst,i);
}
src = I->Int[src].link;
}
return(dst);
}
static int unrelated_lineage(CChamp *I,int index,int *lin_mask)
{
int result = true;
#ifdef RINGDEBUG
printf("%d %d %d %d %d\n",
lin_mask[0],
lin_mask[1],
lin_mask[2],
lin_mask[3],
lin_mask[4]);
#endif
while(index) {
#ifdef RINGDEBUG
printf(" lineage: %d %d\n",I->Int[index].value,lin_mask[I->Int[index].value]);
#endif
if(lin_mask[I->Int[index].value]) {
result=false;
break;
}
index = I->Int[index].link;
}
return result;
}
void ChampCountRings(CChamp *I,int index)
{
ListPat *pat;
ListBond *bd,*bd0,*bd1,*bd2,*bd3;
ListAtom *at,*at0,*at1,*at2,*at3,*at4;
int a,a0,a1,ai,bi,ai0,ai1,ai2,a2,bd_a0,bd_a1,ai3,ai4;
int i2,i0,i1,i3;
int bi0,bi1,bi2,bi3;
int ni1;
int n_atom = 0;
int ring_size;
pat = I->Pat + index;
ai = pat->atom;
while(ai) { /* count number of atoms */
n_atom++;
at = I->Atom + ai;
ai = at->link;
}
if(n_atom) {
ListAtom **atom = NULL;
int *atom_idx;
int *bonds = NULL;
int *neighbors = NULL;
int lin_mask_size;
int **lin_mask_vla = NULL;
int *lin_mask;
int lin_index;
int n_expand;
/* create a packed array of the atom pointers -- these will be our
atom indices for this routine */
atom = mac_malloc_array(ListAtom*,n_atom);
atom_idx = mac_malloc_array(int,n_atom);
a = 0;
ai = pat->atom;
while(ai) { /* load array and provide back-reference in mark_targ */
at = I->Atom + ai;
at->cycle = 0; /* initialize */
at->class = 0;
atom[a] = at;
atom_idx[a] = ai;
at->mark_targ = a; /* used for back-link */
at->mark_read = 0; /* used to mark exclusion */
at->mark_tmpl = 0; /* used to mark as dirty */
at->first_targ = 0; /* used for lineage list */
at->first_tmpl = 0; /* user for lineage mask index */
at->first_base = 0; /* used for storing branches */
ai = at->link;
a++;
}
/* build an efficient neighbor/bond list for all atoms */
bonds = os_calloc(n_atom,sizeof(int)); /* list-starts of global bonds indices */
neighbors = os_calloc(n_atom,sizeof(int)); /* list-starts of local atom indices */
bi = pat->bond;
while(bi) {
bd = I->Bond + bi;
bd->cycle = 0; /* initialize */
bd->class = 0;
ai0 = bd->atom[0]; /* global index */
at0 = I->Atom+ai0;
a0 = at0->mark_targ; /* local */
ai1 = bd->atom[1];
at1 = I->Atom+ai1;
a1 = at1->mark_targ;
if((bd->order)&(cH_Double|cH_Triple)) { /* record bond & atoms with Pi bonds */
at0->class |= cH_Pi;
at1->class |= cH_Pi;
bd->class |= cH_Pi;
}
bonds[a0] = ListElemPushInt(&I->Int,bonds[a0],bi); /* enter this bond */
bonds[a1] = ListElemPushInt(&I->Int,bonds[a1],bi);
neighbors[a0] = ListElemPushInt(&I->Int,neighbors[a0],a1); /* cross-enter neighbors */
#ifdef RINGDEBUG
printf(" ring: neighbor of %d is %d (%d)\n",a0,a1,neighbors[a0]);
#endif
neighbors[a1] = ListElemPushInt(&I->Int,neighbors[a1],a0);
#ifdef RINGDEBUG
printf(" ring: neighbor of %d is %d (%d)\n",a1,a0,neighbors[a1]);
#endif
bi = bd->link;
}
/* set up data structures we'll need for ring finding */
lin_mask_size = sizeof(int)*n_atom;
lin_mask_vla = (int**)VLAMalloc(100,lin_mask_size,5,1); /* auto-zero */
/* okay, repeat the ring finding process for each atom in the molecule
(optimize later to exclude dead-ends) */
for(a=0;a<n_atom;a++) {
int expand;
int clean_up;
int sz;
int next;
lin_index = 1;
#ifdef RINGDEBUG
printf("==========\n");
printf(" ring: starting with atom %d\n",a);
ChampMemoryDump(I);
#endif
expand = 0;
clean_up = 0;
sz = 1;
at = atom[a];
clean_up = ListElemPushInt(&I->Int,clean_up,a); /* mark this atom as dirty */
at->mark_tmpl = true;
at->mark_read = true; /* exclude this atom */
expand = ListElemPushInt(&I->Int,expand,a); /* store this atom for the expansion cycle */
vla_check(lin_mask_vla,int*,lin_index); /* assign blank lineage for this atom */
at->first_tmpl = lin_index;
lin_index++;
while(sz<(RING_SEARCH_CUTOFF+1)) { /* while the potential loop is small...*/
next = 0; /* start list of atoms for growth pass */
/* find odd cycles and note growth opportunies */
while(expand) { /* iterate over each atom to expand */
expand = ListElemPopInt(I->Int,expand,&a0);
#ifdef RINGDEBUG
printf(" ring: expand sz %d a0 %d\n",sz,a0);
#endif
ni1 = neighbors[a0];
while(ni1) { /* iterate over each neighbor of that atom */
#ifdef RINGDEBUG
printf(" ring: ni1 %d\n",ni1);
#endif
ni1 = ListElemGetInt(I->Int,ni1,&a1);
#ifdef RINGDEBUG
printf(" ring: neighbor of %d is %d\n",a0,a1);
printf(" ring: neighbor %d mark_read %d\n",a1,atom[a1]->mark_read);
#endif
if(!atom[a1]->mark_read) { /* if that neighbor hasn't yet been covered... */
if(!atom[a1]->first_base) {
next = ListElemPushInt(&I->Int,next,a1); /* store it for growth pass */
}
atom[a1]->first_base = /* and record this entry to it */
ListElemPushInt(&I->Int,atom[a1]->first_base,a0);
if(!atom[a1]->mark_tmpl) {/* mark for later clean_up */
clean_up = ListElemPushInt(&I->Int,clean_up,a1);
atom[a1]->mark_tmpl = true;
}
} else if(sz==atom[a1]->mark_read) { /* ...or if the neighbor has an equal exclusion level... */
#ifdef RINGDEBUG
printf(" ring: checking lineage between %d %d\n",a0,a1);
#endif
if(unrelated_lineage(I,atom[a0]->first_targ, /* unrelated? */
(int*)(((char*)lin_mask_vla)+(atom[a1]->first_tmpl*lin_mask_size))))
{
ring_size = sz*2-1;
#ifdef RINGDEBUG
printf(" ring: #### found odd cycle %d\n",ring_size);
#endif
/* then we have a cycle of size sz*2-1 (odd) */
at0 = atom[a0];
at1 = atom[a1];
merge_lineages(I,
&at0->first_targ,
(int*)(((char*)lin_mask_vla)+(at0->first_tmpl*lin_mask_size)),
&at1->first_targ,
(int*)(((char*)lin_mask_vla)+(at1->first_tmpl*lin_mask_size)));
/* set the atom bits appropriately */
at0->cycle|=num_to_ring[ring_size];
at1->cycle|=num_to_ring[ring_size];
i0 = bonds[a0];
ai0 = atom_idx[a0];
ai1 = atom_idx[a1];
while(i0) {
i0 = ListElemGetInt(I->Int,i0,&bi);
bd = I->Bond+bi;
bd_a0=bd->atom[0];
bd_a1=bd->atom[1];
if(((bd_a0==ai0)&&(bd_a1==ai1))||
((bd_a1==ai0)&&(bd_a0==ai1)))
bd->cycle|=num_to_ring[ring_size];
}
}
}
}
}
/* find even cycles and grow pass */
n_expand = 0;
while(next) { /* iterate over potential branches */
next = ListElemPopInt(I->Int,next,&a1);
#ifdef RINGDEBUG
printf(" ring: next %d\n",a1);
#endif
at1 = atom[a1];
/* see if the base atoms form distinct cycles */
i0 = atom[a1]->first_base;
while(i0) {
a0 = I->Int[i0].value;
i2 = I->Int[i0].link;
while(i2) {
a2 = I->Int[i2].value;
if(unrelated_lineage(I,atom[a0]->first_targ,
(int*)(((char*)lin_mask_vla)+(atom[a2]->first_tmpl*lin_mask_size))))
{
ring_size = sz*2;
at0 = atom[a0];
at2 = atom[a2];
#ifdef RINGDEBUG
printf(" ring: #### found even cycle %d %d %d\n",ring_size,i0,i2);
#endif
at0->cycle|=num_to_ring[ring_size];
at1->cycle|=num_to_ring[ring_size];
at2->cycle|=num_to_ring[ring_size];
i1 = bonds[a1];
ai0 = atom_idx[a0];
ai1 = atom_idx[a1];
ai2 = atom_idx[a2];
while(i1) {
i1 = ListElemGetInt(I->Int,i1,&bi);
bd = I->Bond+bi;
bd_a0=bd->atom[0];
bd_a1=bd->atom[1];
if(((bd_a0==ai0)&&(bd_a1==ai1))||
((bd_a1==ai0)&&(bd_a0==ai1))||
((bd_a0==ai2)&&(bd_a1==ai1))||
((bd_a1==ai2)&&(bd_a0==ai1)))
bd->cycle|=num_to_ring[ring_size];
}
/* we have a cycle of size sz*2 */
/* set atom and bond bits appropriately */
}
i2 = I->Int[i2].link;
}
i0 = I->Int[i0].link;
}
/* allocate a new lineage for this branch atom */
vla_check(lin_mask_vla,int*,lin_index);
at1->first_tmpl = lin_index;
lin_mask = (int*)(((char*)lin_mask_vla)+(lin_mask_size*at1->first_tmpl));
lin_index++;
/* extend this particular lineage with each linking atom's lineage*/
i0 = atom[a1]->first_base;
a0 = I->Int[i0].value;
if(a0!=a) {
lin_mask[a0] = 1; /* add base atom */
at1->first_targ = ListElemPushInt(&I->Int,at1->first_targ,a0);
}
while(i0) { /* and lineage of base atom */
a0 = I->Int[i0].value;
at0 = atom[a0];
at1->first_targ = combine_lineage(I,at0->first_targ,at1->first_targ,lin_mask);
i0 = I->Int[i0].link;
}
expand = ListElemPushInt(&I->Int,expand,a1);
n_expand++;
at1->mark_read = sz+1;
if(!at1->mark_tmpl) {/* mark for later clean_up */
clean_up = ListElemPushInt(&I->Int,clean_up,a1);
at1->mark_tmpl = 1;
}
/* clean up */
ListElemFreeChain(I->Int,atom[a1]->first_base); /* free base atom list */
atom[a1]->first_base = 0;
/* copy the lineage to the new atom */
}
sz++;
if(n_expand<2) {/* on a uniconnected branch, so no point in continuing */
break;
}
}
/* clean-up expansion list */
ListElemFreeChain(I->Int,expand);
expand = 0;
while(clean_up) {
clean_up=ListElemPopInt(I->Int,clean_up,&a0);
at = atom[a0];
/* reset lineage list and mask */
lin_mask = (int*)(((char*)lin_mask_vla)+ (lin_mask_size*at->first_tmpl));
while(at->first_targ) {
at->first_targ = ListElemPopInt(I->Int, at->first_targ,&a1);
lin_mask[a1] = 0;
}
#ifdef RINGDEBUG
for(i0=0;i0<n_atom;i0++) {
if(lin_mask[i0])
printf("lineage mask unclean !\n");
}
#endif
at->mark_read = 0; /* used to mark exclusion */
at->mark_tmpl = 0; /* used to mark as dirty */
at->first_targ = 0; /* used for lineage list */
at->first_tmpl = 0; /* user for lineage mask index */
at->first_base = 0; /* used for storing branches */
}
}
#ifdef RINGDEBUG
for(a=0;a<n_atom;a++) {
i1 = bonds[a];
while(i1) { /* bonds of atom 1 */
i1 = ListElemGetInt(I->Int,i1,&bi1);
bd1 = I->Bond + bi1;
ai1 = bd1->atom[0];
ai2 = bd1->atom[1];
at1 = I->Atom + ai1;
at2 = I->Atom + ai2;
printf("%d %d %x\n",at1->mark_targ+1,at2->mark_targ+1,bd1->cycle);
}
}
#endif
/* now determine which atoms/bonds are aromatic */
/* the following substructures are considered "aromatic" if they occur in rings of 5 or 6
* *=*-*=*
* *=*-*-*=*
* NOTE this is not a chemically accurate definition of aromaticity, just one
* that is easy to program, and which covers most common occurences -- an 80-90% soln.
*
* I'll figure out something better later on...
*/
ai0 = pat->atom;
while(ai0) { /* cycle through each atom */
at0 = I->Atom + ai0;
if(at0->cycle&(cH_Ring4|cH_Ring5|cH_Ring6)) {
/* atom 0 is cyclic */
i0 = bonds[at0->mark_targ];
while(i0) { /* bonds of atom 0 */
i0 = ListElemGetInt(I->Int,i0,&bi0);
bd0 = I->Bond + bi0;
if((bd0->order==cH_Double)&&(bd0->cycle&(cH_Ring4|cH_Ring5|cH_Ring6))) {
/* bond 0 is double */
ai1 = bd0->atom[0];
if(ai0==ai1) ai1 = bd0->atom[1];
at1 = I->Atom + ai1;
if(at1->cycle&(cH_Ring4|cH_Ring5|cH_Ring6)) {
/* atom 1 is cyclic */
i1 = bonds[at1->mark_targ];
while(i1) { /* bonds of atom 1 */
i1 = ListElemGetInt(I->Int,i1,&bi1);
bd1 = I->Bond + bi1;
if((bd1->order==cH_Single)&&(bd1->cycle&(cH_Ring4|cH_Ring5|cH_Ring6))) {
/* bond 1 is single */
ai2 = bd1->atom[0];
if(ai1==ai2) ai2 = bd1->atom[1];
at2 = I->Atom + ai2;
if(at2->cycle&(cH_Ring4|cH_Ring5|cH_Ring6)) {
/* atom 2 is cyclic */
i2 = bonds[at2->mark_targ];
while(i2) {
i2 = ListElemGetInt(I->Int,i2,&bi2);
bd2 = I->Bond + bi2;
if((bd2->order==cH_Double)&&(bd2->cycle&(cH_Ring4|cH_Ring5|cH_Ring6))) {
/* bond 2 is double */
ai3 = bd2->atom[0];
if(ai2==ai3) ai3 = bd2->atom[1];
at3 = I->Atom + ai3;
if(at3->cycle&(cH_Ring4|cH_Ring5|cH_Ring6)) {
/* atom 3 is cyclic, therefore system is aromatic */
at3->class|=cH_Aromatic|cH_Pi;
at2->class|=cH_Aromatic|cH_Pi;
at1->class|=cH_Aromatic|cH_Pi;
at0->class|=cH_Aromatic|cH_Pi;
bd2->class|=cH_Aromatic|cH_Pi;
bd1->class|=cH_Aromatic|cH_Pi;
bd0->class|=cH_Aromatic|cH_Pi;
}
}
}
i2 = bonds[at2->mark_targ];
while(i2) {
i2 = ListElemGetInt(I->Int,i2,&bi2);
bd2 = I->Bond + bi2;
if((bd2->order==cH_Single)&&(bd2->cycle&(cH_Ring4|cH_Ring5|cH_Ring6))) {
/* bond 2 is single */
ai3 = bd2->atom[0];
if(ai2==ai3) ai3 = bd2->atom[1];
if(ai3!=ai1) {
/* avoid backtracking... */
at3 = I->Atom + ai3;
if(at3->cycle&(cH_Ring4|cH_Ring5|cH_Ring6)) {
/* atom 3 is cyclic */
i3 = bonds[at3->mark_targ];
while(i3) {
i3 = ListElemGetInt(I->Int,i3,&bi3);
bd3 = I->Bond + bi3;
if((bd3->order==cH_Double)&&(bd3->cycle&(cH_Ring4|cH_Ring5|cH_Ring6))) {
/* bond 3 is double */
ai4 = bd3->atom[0];
if(ai3==ai4) ai4 = bd3->atom[1];
at4 = I->Atom + ai4;
if(at4->cycle&(cH_Ring4|cH_Ring5|cH_Ring6)) {
/* atom 4 is cyclic, therefore system is aromatic */
at4->class|=cH_Aromatic|cH_Pi;
at3->class|=cH_Aromatic|cH_Pi;
at2->class|=cH_Aromatic|cH_Pi;
at1->class|=cH_Aromatic|cH_Pi;
at0->class|=cH_Aromatic|cH_Pi;
bd3->class|=cH_Aromatic|cH_Pi;
bd2->class|=cH_Aromatic|cH_Pi;
bd1->class|=cH_Aromatic|cH_Pi;
bd0->class|=cH_Aromatic|cH_Pi;
}
}
}
}
}
}
}
}
}
}
}
}
}
}
ai0 = at0->link;
}
for(a=0;a<n_atom;a++) { /* default conclusions */
at = atom[a];
if(!at->cycle)
at->cycle=cH_Acyclic;
if(!(at->class&(cH_Aromatic|cH_Aliphatic)))
at->class|=cH_Aliphatic;
}
bi = pat->bond;
while(bi) {
bd = I->Bond + bi;
if(!bd->cycle)
bd->cycle=cH_Acyclic;
if(!(bd->class&(cH_Aromatic|cH_Aliphatic)))
bd->class=cH_Aliphatic;
bi = bd->link;
}
/* now, clean up our efficient bond and neighbor lists */
for(a=0;a<n_atom;a++) {
ListElemFreeChain(I->Int,neighbors[a]);
ListElemFreeChain(I->Int,bonds[a]);
}
mac_free(neighbors);
mac_free(atom);
mac_free(atom_idx);
mac_free(bonds);
/* and free the other data structures */
vla_free(lin_mask_vla);
}
#ifdef RINGDEBUG
ChampPatDump(I,index);
#endif
}
void ChampCountBondsEtc(CChamp *I,int index)
{
ListPat *pat;
ListAtom *at,*at0,*at1;
int ai,ai0,ai1,bi;
ListBond *bd;
int val_adj;
pat = I->Pat + index;
/* initialize */
ai = pat->atom;
while(ai) {
at = I->Atom + ai;
at->valence=0;
at->degree=0;
at->tot_hydro=0;
ai = at->link;
}
/* count */
bi = pat->bond;
while(bi) {
bd = I->Bond + bi;
ai0 = bd->atom[0];
ai1 = bd->atom[1];
at0 = I->Atom + ai0;
at1 = I->Atom + ai1;
at0->degree++;
at1->degree++;
if(at0->atom&cH_H) at1->tot_hydro++;
if(at1->atom&cH_H) at0->tot_hydro++;
switch(bd->order) {
case cH_Single:
at0->valence++;
at1->valence++;
break;
case cH_Double:
at0->valence+=2;
at1->valence+=2;
break;
case cH_Triple:
at0->valence+=3;
at1->valence+=3;
break;
}
bi = bd->link;
}
/* convert to bit masks */
ai = pat->atom;
while(ai) {
at = I->Atom + ai;
at->degree = num_to_degree[at->degree];
if(at->comp_imp_hydro_flag) {
at->imp_hydro = 0;
switch(at->charge) { /* adjust effective valence w.r.t charge */
case cH_Neutral: val_adj=0; break;
default: val_adj=0; break;
case cH_Cation: val_adj=-1; break;
case cH_Dication: val_adj=-2; break;
case cH_Anion: val_adj=1; break;
case cH_Dianion: val_adj=2; break;
}
val_adj += at->valence;
switch(at->atom) { /* now compute implicit hydrogens */
case cH_B:
if(val_adj<2)
at->imp_hydro = 2 - val_adj;
break;
case cH_C:
if(val_adj<4)
at->imp_hydro = 4 - val_adj;
break;
case cH_N:
if(val_adj<3)
at->imp_hydro = 3 - val_adj;
else if(val_adj<5)
at->imp_hydro = 5 - val_adj;
break;
case cH_O:
if(val_adj<2)
at->imp_hydro = 2 - val_adj;
break;
case cH_S:
if(val_adj<2)
at->imp_hydro = 2 - val_adj;
else if(val_adj<4)
at->imp_hydro = 4 - val_adj;
else if(val_adj<6)
at->imp_hydro = 6 - val_adj;
break;
case cH_P:
if(val_adj<3)
at->imp_hydro = 3 - val_adj;
else if(val_adj<5)
at->imp_hydro = 5 - val_adj;
break;
case cH_F:
case cH_Cl:
case cH_Br:
case cH_I:
if(val_adj<1)
at->imp_hydro = 1 - val_adj;
break;
}
at->valence+=at->imp_hydro; /* compute total valence */
}
at->tot_hydro+=at->imp_hydro;
at->hydro_flag = true; /* we have a valid hydrogen count... */
at->valence=num_to_valence[at->valence];
ai = at->link;
}
}
void ChampCheckCharge(CChamp *I,int index)
{
ListPat *pat;
ListAtom *at;
int ai;
pat = I->Pat + index;
ai = pat->atom;
while(ai) {
at = I->Atom + ai;
if(!at->charge) {
at->charge=cH_Neutral;
}
ai = at->link;
}
}
void ChampPrepareTarget(CChamp *I,int index)
{
ListPat *pat;
pat = I->Pat + index; /* NOTE: assumes I->Pat is stationary! */
if(!pat->target_prep) {
pat->target_prep=1;
ChampCountRings(I,index);
ChampCountBondsEtc(I,index);
ChampCheckCharge(I,index);
if(pat->unique_atom)
ChampUniqueListFree(I,pat->unique_atom);
pat->unique_atom = ChampUniqueListNew(I,pat->atom,0);
}
}
void ChampPreparePattern(CChamp *I,int index)
{
ListPat *pat;
pat = I->Pat + index; /* NOTE: assumes I->Pat is stationary! */
if(!pat->unique_atom)
pat->unique_atom = ChampUniqueListNew(I,pat->atom,0);
}
static int PTruthCallStr(PyObject *object,char *method,char *argument)
{
int result = false;
PyObject *tmp;
tmp = PyObject_CallMethod(object,method,"s",argument);
if(tmp) {
if(PyObject_IsTrue(tmp))
result = 1;
Py_DECREF(tmp);
}
return(result);
}
static int PConvPyObjectToInt(PyObject *object,int *value)
{
int result = true;
PyObject *tmp;
if(!object)
result=false;
else if(PyInt_Check(object)) {
(*value) = (int)PyInt_AsLong(object);
} else {
tmp = PyNumber_Int(object);
if(tmp) {
(*value) = (int)PyInt_AsLong(tmp);
Py_DECREF(tmp);
} else
result=false;
}
return(result);
}
static void UtilCleanStr(char *s) /*remove flanking white and all unprintables*/{
char *p,*q;
p=s;
q=s;
while(*p)
if(*p>32)
break;
else
p++;
while(*p)
if(*p>=32)
(*q++)=(*p++);
else
p++;
*q=0;
while(q>=s)
{
if(*q>32)
break;
else
{
(*q)=0;
q--;
}
}
}
static int PConvPyObjectToStrMaxClean(PyObject *object,char *value,int ln)
{
char *st;
PyObject *tmp;
int result=true;
if(!object)
result=false;
else if(PyString_Check(object)) {
st = PyString_AsString(object);
strncpy(value,st,ln);
} else {
tmp = PyObject_Str(object);
if(tmp) {
st = PyString_AsString(tmp);
strncpy(value,st,ln);
Py_DECREF(tmp);
} else
result=0;
}
if(ln>0)
value[ln]=0;
else
value[0]=0;
UtilCleanStr(value);
return(result);
}
/*static int PConvPyObjectToStrMaxLen(PyObject *object,char *value,int ln)
{
char *st;
PyObject *tmp;
int result=true;
if(!object)
result=false;
else if(PyString_Check(object)) {
st = PyString_AsString(object);
strncpy(value,st,ln);
} else {
tmp = PyObject_Str(object);
if(tmp) {
st = PyString_AsString(tmp);
strncpy(value,st,ln);
Py_DECREF(tmp);
} else
result=0;
}
if(ln>0)
value[ln]=0;
else
value[0]=0;
return(result);
}
static int PConvAttrToStrMaxLen(PyObject *obj,char *attr,char *str,int ll)
{
int ok=true;
PyObject *tmp;
if(PyObject_HasAttrString(obj,attr)) {
tmp = PyObject_GetAttrString(obj,attr);
ok = PConvPyObjectToStrMaxLen(tmp,str,ll);
Py_DECREF(tmp);
} else {
ok=false;
}
return(ok);
}
*/
static int PConvPyListToFloatArrayInPlace(PyObject *obj,float *ff,int ll)
{
int ok = true;
int a,l;
if(!obj) {
ok=false;
} else if(!PyList_Check(obj)) {
ok=false;
} else {
l=PyList_Size(obj);
if (l!=ll)
ok=false;
else {
if(!l)
ok=-1;
else
ok=l;
for(a=0;a<l;a++)
*(ff++) = (float)PyFloat_AsDouble(PyList_GetItem(obj,a));
}
/* NOTE ASSUMPTION! */
}
return(ok);
}
int ChampModelToPat(CChamp *I,PyObject *model)
{
int nAtom,nBond;
int a;
int ok=true;
int cur_atom=0,last_atom = 0;
ListAtom *at;
int cur_bond=0,last_bond = 0;
ListBond *bd;
int charge=0,order=1;
int result = 0;
int atom1,atom2;
int *atom_index = NULL;
int std_flag;
char *c;
PyObject *atomList = NULL;
PyObject *bondList = NULL;
PyObject *molec = NULL;
PyObject *atom = NULL;
PyObject *bnd = NULL;
PyObject *index = NULL;
PyObject *tmp = NULL;
nAtom=0;
nBond=0;
atomList = PyObject_GetAttrString(model,"atom");
if(atomList)
nAtom = PyList_Size(atomList);
else
ok=err_message("ChampModel2Pat","can't get atom list");
atom_index = mac_malloc_array(int,nAtom);
if(ok) {
for(a=nAtom-1;a>=0;a--) /* reverse order */
{
atom = PyList_GetItem(atomList,a);
if(!atom)
ok=err_message("ChampModel2Pat","can't get atom");
cur_atom = ListElemNewZero(&I->Atom);
at = I->Atom + cur_atom;
at->link = last_atom;
last_atom = cur_atom;
at->chempy_atom = atom;
Py_INCREF(at->chempy_atom);
atom_index[a] = cur_atom; /* for bonds */
if(ok) {
tmp = PyObject_GetAttrString(atom,"name");
if (tmp)
ok = PConvPyObjectToStrMaxClean(tmp,at->name,NAM_SIZE-1);
if(!ok)
err_message("ChampModel2Pat","can't read name");
Py_XDECREF(tmp);
}
if(ok) {
if(PTruthCallStr(atom,"has","flags")) {
tmp = PyObject_GetAttrString(atom,"flags");
if (tmp)
ok = PConvPyObjectToInt(tmp,(int*)&at->tag);
if(!ok)
err_message("ChampModel2Pat","can't read flags");
Py_XDECREF(tmp);
} else {
at->tag = 0;
}
}
if(ok) {
if(PTruthCallStr(atom,"has","index")) { /* note -- chempy models have 1-based index attributes
even though the arrays are zero-based */
tmp = PyObject_GetAttrString(atom,"index");
if (tmp)
ok = PConvPyObjectToInt(tmp,(int*)&at->ext_index);
if(!ok)
err_message("ChampModel2Pat","can't read index");
Py_XDECREF(tmp);
} else {
at->index = 0;
}
}
if(ok) {
if(PTruthCallStr(atom,"has","coord")) {
tmp = PyObject_GetAttrString(atom,"coord");
if (tmp)
ok = PConvPyListToFloatArrayInPlace(tmp,at->coord,3);
if(!ok)
err_message("ChampModel2Pat","can't read coordinates");
Py_XDECREF(tmp);
}
}
if(ok) {
if(PTruthCallStr(atom,"has","formal_charge")) {
tmp = PyObject_GetAttrString(atom,"formal_charge");
if (tmp)
ok = PConvPyObjectToInt(tmp,&charge);
if(!ok)
err_message("ChampModel2Pat","can't read formal_charge");
Py_XDECREF(tmp);
} else {
charge = 0;
}
switch(charge) {
case 0: at->charge = cH_Neutral; break;
case 1: at->charge = cH_Cation; break;
case 2: at->charge = cH_Dication; break;
case -1: at->charge = cH_Anion; break;
case -2: at->charge = cH_Dianion; break;
}
}
if(ok) {
tmp = PyObject_GetAttrString(atom,"resn");
if (tmp)
ok = PConvPyObjectToStrMaxClean(tmp,at->residue,RES_SIZE-1);
if(!ok)
err_message("ChampModel2Pat","can't read resn");
Py_XDECREF(tmp);
}
if(ok) {
tmp = PyObject_GetAttrString(atom,"symbol");
if (tmp)
ok = PConvPyObjectToStrMaxClean(tmp,at->symbol,SYM_SIZE-1);
if(!ok)
err_message("ChampModel2Pat","can't read symbol");
c = at->symbol;
std_flag = false;
switch(*c) {
case 'C':
switch(*(c+1)) {
case 'l':
case 'L':
ChampParseAliphaticAtom(I,c,cur_atom,cH_Cl,2,false);
std_flag = true;
break;
default:
ChampParseAliphaticAtom(I,c,cur_atom,cH_C,1,true);
std_flag = true;
break;
}
break;
case 'H': /* nonstandard */
ChampParseAliphaticAtom(I,c,cur_atom,cH_H,1,false);
std_flag = true;
break;
case 'N':
ChampParseAliphaticAtom(I,c,cur_atom,cH_N,1,true);
std_flag = true;
break;
case 'O':
ChampParseAliphaticAtom(I,c,cur_atom,cH_O,1,true);
std_flag = true;
break;
case 'B':
switch(*(c+1)) {
case 'r':
case 'R':
ChampParseAliphaticAtom(I,c,cur_atom,cH_Br,2,false);
std_flag = true;
break;
default:
ChampParseAliphaticAtom(I,c,cur_atom,cH_B,1,true);
std_flag = true;
break;
}
break;
case 'A':
ChampParseAliphaticAtom(I,c,cur_atom,cH_A,1,true);
std_flag = true;
break;
case 'P':
ChampParseAliphaticAtom(I,c,cur_atom,cH_P,1,true);
std_flag = true;
break;
case 'S':
ChampParseAliphaticAtom(I,c,cur_atom,cH_S,1,true);
std_flag = true;
break;
case 'F':
ChampParseAliphaticAtom(I,c,cur_atom,cH_F,1,false);
std_flag = true;
break;
case 'I':
ChampParseAliphaticAtom(I,c,cur_atom,cH_I,1,false);
std_flag = true;
break;
case 'E':
ChampParseAliphaticAtom(I,c,cur_atom,cH_E,1,false);
std_flag = true;
break;
case 'G':
ChampParseAliphaticAtom(I,c,cur_atom,cH_G,1,false);
std_flag = true;
break;
case 'J':
ChampParseAliphaticAtom(I,c,cur_atom,cH_J,1,false);
std_flag = true;
break;
case 'L':
ChampParseAliphaticAtom(I,c,cur_atom,cH_L,1,false);
std_flag = true;
break;
case 'M':
ChampParseAliphaticAtom(I,c,cur_atom,cH_M,1,false);
std_flag = true;
break;
case 'Q':
ChampParseAliphaticAtom(I,c,cur_atom,cH_Q,1,false);
std_flag = true;
break;
case 'R':
ChampParseAliphaticAtom(I,c,cur_atom,cH_R,1,false);
std_flag = true;
break;
case 'T':
ChampParseAliphaticAtom(I,c,cur_atom,cH_T,1,false);
std_flag = true;
break;
case 'X':
ChampParseAliphaticAtom(I,c,cur_atom,cH_X,1,false);
std_flag = true;
break;
case 'Z':
ChampParseAliphaticAtom(I,c,cur_atom,cH_Z,1,false);
std_flag = true;
break;
}
if(!std_flag) {
ChampParseStringAtom(I,c,cur_atom,strlen(c));
}
Py_XDECREF(tmp);
}
if(!ok)
break;
}
}
bondList = PyObject_GetAttrString(model,"bond");
if(bondList)
nBond = PyList_Size(bondList);
else
ok=err_message("ChampModel2Pat","can't get bond list");
if(ok) {
for(a=nBond-1;a>=0;a--) /* reverse order */
{
bnd = PyList_GetItem(bondList,a);
if(!bnd)
ok=err_message("ChampModel2Pat","can't get bond");
cur_bond = ListElemNewZero(&I->Bond);
bd = I->Bond + cur_bond;
bd->link = last_bond;
last_bond = cur_bond;
bd->chempy_bond = bnd;
Py_INCREF(bd->chempy_bond);
if(ok) {
tmp = PyObject_GetAttrString(bnd,"order");
if (tmp)
ok = PConvPyObjectToInt(tmp,&order);
if(!ok)
err_message("ChampModel2Pat","can't read bond order");
switch(order) {
case 1: bd->order = cH_Single; break;
case 2: bd->order = cH_Double; break;
case 3: bd->order = cH_Triple; break;
case 4: bd->order = cH_Single; bd->class = cH_Aromatic|cH_Pi; break;
}
Py_XDECREF(tmp);
}
index = PyObject_GetAttrString(bnd,"index");
if(!index)
ok=err_message("ChampModel2Pat","can't get bond indices");
else {
if(ok) ok = PConvPyObjectToInt(PyList_GetItem(index,0),&atom1);
if(ok) ok = PConvPyObjectToInt(PyList_GetItem(index,1),&atom2);
if(!ok)
err_message("ChampModel2Pat","can't read bond atoms");
else {
atom1 = atom_index[atom1];
atom2 = atom_index[atom2];
if(order==4) {
I->Atom[atom1].class |= cH_Aromatic|cH_Pi;
I->Atom[atom2].class |= cH_Aromatic|cH_Pi;
}
bd->atom[0]=atom1;
bd->atom[1]=atom2;
bd->pri[0]=0;
bd->pri[1]=0;
ChampAddBondToAtom(I,atom1,cur_bond);
ChampAddBondToAtom(I,atom2,cur_bond);
}
}
Py_XDECREF(index);
}
}
Py_XDECREF(atomList);
Py_XDECREF(bondList);
if(PyObject_HasAttrString(model,"molecule")) {
molec = PyObject_GetAttrString(model,"molecule"); /* returns new reference */
} else {
molec = NULL;
}
mac_free(atom_index);
result = ListElemNewZero(&I->Pat);
if(result)
{
I->ActivePatList = ListElemPushInt(&I->Int,I->ActivePatList,result);
I->Pat[result].atom = cur_atom;
I->Pat[result].bond = cur_bond;
I->Pat[result].chempy_molecule = molec;
ChampPatReindex(I,result);
}
return(result);
}
/* ===============================================================
* Class-specific Memory Management
* =============================================================== */
CChamp *ChampNew(void) {
CChamp *I;
feedback_Init(); /* only has side-effects the first time */
ChiralInit();
I = mac_calloc(CChamp);
I->Atom = (ListAtom*)ListNew(8000,sizeof(ListAtom));
I->Bond = (ListBond*)ListNew(32000,sizeof(ListBond));
I->Int = (ListInt*)ListNew(16000,sizeof(ListInt));
I->Int2 = (ListInt2*)ListNew(16000,sizeof(ListInt2));
I->Int3 = (ListInt3*)ListNew(16000,sizeof(ListInt3));
I->Tmpl = (ListTmpl*)ListNew(1000,sizeof(ListTmpl));
I->Targ = (ListTarg*)ListNew(1000,sizeof(ListTarg));
I->Pat = (ListPat*)ListNew(1000,sizeof(ListPat));
I->Scope = (ListScope*)ListNew(100,sizeof(ListScope));
I->Str = StrBlockNew(1000);
I->Match = (ListMatch*)ListNew(1000,sizeof(ListMatch));
I->ActivePatList = 0;
return I;
}
void ChampMemoryDump(CChamp *I)
{
printf(" ChampMemory: Pat %d\n",ListGetNAlloc(I->Pat));
printf(" ChampMemory: Atom %d\n",ListGetNAlloc(I->Atom));
printf(" ChampMemory: Bond %d\n",ListGetNAlloc(I->Bond));
printf(" ChampMemory: Int %d\n",ListGetNAlloc(I->Int));
printf(" ChampMemory: Int2 %d\n",ListGetNAlloc(I->Int2));
printf(" ChampMemory: Int3 %d\n",ListGetNAlloc(I->Int3));
printf(" ChampMemory: Tmpl %d\n",ListGetNAlloc(I->Tmpl));
printf(" ChampMemory: Targ %d\n",ListGetNAlloc(I->Targ));
printf(" ChampMemory: Scope %d\n",ListGetNAlloc(I->Scope));
printf(" ChampMemory: Match %d\n",ListGetNAlloc(I->Match));
}
int ChampMemoryUsage(CChamp *I)
{
return(
ListGetNAlloc(I->Pat)+
ListGetNAlloc(I->Atom)+
ListGetNAlloc(I->Bond)+
ListGetNAlloc(I->Int)+
ListGetNAlloc(I->Int2)+
ListGetNAlloc(I->Int3)+
ListGetNAlloc(I->Tmpl)+
ListGetNAlloc(I->Targ)+
ListGetNAlloc(I->Scope)+
ListGetNAlloc(I->Match));
}
void ChampFree(CChamp *I) {
while(I->ActivePatList) {
ChampPatFree(I,I->ActivePatList); /* will update ActivePatList */
}
ListFree(I->Pat);
ListFree(I->Atom);
ListFree(I->Bond);
ListFree(I->Int);
ListFree(I->Int2);
ListFree(I->Int3);
ListFree(I->Tmpl);
ListFree(I->Targ);
ListFree(I->Scope);
ListFree(I->Match);
StrBlockFree(I->Str);
mac_free(I);
}
void ChampPatReindex(CChamp *I,int index)
{
ListPat *pt;
ListAtom *at;
ListBond *bd;
int ai,bi;
int b_idx=0;
int a_idx=0;
if(index) {
pt = I->Pat + index;
ai = pt->atom;
while(ai) {
at = I->Atom + ai;
at->index = a_idx++;
ai = at->link;
}
bi = pt->bond;
while(bi) {
bd = I->Bond + bi;
bd->index = b_idx++;
bi = bd->link;
}
}
}
void ChampPatFree(CChamp *I,int index)
{
ListPat *pt;
if(index) {
pt = I->Pat + index;
ChampAtomFreeChain(I,I->Pat[index].atom);
ChampBondFreeChain(I,I->Pat[index].bond);
if(pt->chempy_molecule) {Py_DECREF(pt->chempy_molecule);}
ChampUniqueListFree(I,I->Pat[index].unique_atom);
ListElemFree(I->Pat,index);
I->ActivePatList = ListElemPurgeInt(I->Int,I->ActivePatList,index);
}
}
void ChampAtomFree(CChamp *I,int atom)
{
ListAtom *at;
if(atom) {
at = I->Atom + atom;
if(at->chempy_atom) {Py_DECREF(at->chempy_atom);}
}
ListElemFree(I->Atom,atom);
}
void ChampAtomFreeChain(CChamp *I,int atom)
{
ListAtom *at;
int i;
i = atom;
while(i) {
at = I->Atom + i;
if(at->chempy_atom) {Py_DECREF(at->chempy_atom);}
i = I->Atom[i].link;
}
ListElemFreeChain(I->Atom,atom);
}
void ChampBondFree(CChamp *I,int bond)
{
ListBond *bd;
if(bond) {
bd = I->Bond + bond;
if(bd->chempy_bond) {Py_DECREF(bd->chempy_bond);}
}
ListElemFree(I->Bond,bond);
}
void ChampBondFreeChain(CChamp *I,int bond)
{
ListBond *at;
int i;
i = bond;
while(i) {
at = I->Bond + i;
if(at->chempy_bond) {Py_DECREF(at->chempy_bond);}
i = I->Bond[i].link;
}
ListElemFreeChain(I->Bond,bond);
}
/* ===============================================================
* Unique atoms in patterns
* =============================================================== */
/* Unique atoms are stored in Int3 as
0: representative atom index
1: total count of identical atoms
2: list index (I->Int) of all identical atoms
*/
int ChampUniqueListNew(CChamp *I,int atom, int unique_list)
{
/* returns new root for unique_list */
int cur_atom,next_atom;
int unique,unique_atom;
int ident;
cur_atom = atom;
while(cur_atom) { /* iterate through each atom in the molecule */
next_atom = I->Atom[cur_atom].link;
unique = unique_list;
while(unique) { /* check to see if it matches an existing unique atom */
unique_atom = I->Int3[unique].value[0];
if(ChampPatIdentical(I->Atom+cur_atom,I->Atom+unique_atom)) {
/* if so, then just add this atom the match list for this unique atom */
I->Int3[unique].value[1]++; /* count */
ident = ListElemNew(&I->Int);
I->Int[ident].link = I->Int3[unique].value[2];
I->Int[ident].value = cur_atom;
I->Int3[unique].value[2] = ident;
cur_atom = 0; /* indicate a miss */
break;
}
unique = I->Int3[unique].link;
}
if(cur_atom) { /* found a new unique atom */
unique_list = ListElemPush(&I->Int3,unique_list);
I->Int3[unique_list].value[0] = cur_atom;
I->Int3[unique_list].value[1] = 1;
#if 0
ChampAtomDump(I,cur_atom);
#endif
ident = ListElemNew(&I->Int);
I->Int[ident].value = cur_atom;
I->Int3[unique_list].value[2] = ident; /* init list of other identical atoms */
}
cur_atom = next_atom;
}
#if 0
printf("\n");
#endif
return(unique_list);
}
void ChampUniqueListFree(CChamp *I,int unique_list)
{
int unique = unique_list;
while(unique) {
ListElemFreeChain(I->Int,I->Int3[unique].value[2]);
unique = I->Int3[unique].link;
}
ListElemFreeChain(I->Int3,unique_list);
}
/* ===============================================================
* Comparison
* =============================================================== */
int ChampMatch_1V1_B(CChamp *I,int pattern,int target)
{
ChampPreparePattern(I,pattern);
ChampPrepareTarget(I,target);
return(ChampMatch(I,pattern,target,
ChampFindUniqueStart(I,pattern,target,NULL),
1,NULL,false));
}
int ChampMatch_1V1_N(CChamp *I,int pattern,int target,int limit,int tag_mode)
{
ChampPreparePattern(I,pattern);
ChampPrepareTarget(I,target);
return(ChampMatch(I,pattern,target,
ChampFindUniqueStart(I,pattern,target,NULL),
limit,NULL,tag_mode));
}
int ChampMatch_1V1_Map(CChamp *I,int pattern,int target,int limit,int tag_mode)
{
int match_start = 0;
ChampPreparePattern(I,pattern);
ChampPrepareTarget(I,target);
ChampMatch(I,pattern,target,
ChampFindUniqueStart(I,pattern,target,NULL),
limit,&match_start,tag_mode);
return(match_start);
}
int ChampMatch_1VN_N(CChamp *I,int pattern,int list)
{
int target;
int c = 0;
ChampPreparePattern(I,pattern);
while(list) {
target = I->Int[list].value;
ChampPrepareTarget(I,target);
if(ChampMatch(I,pattern,target,
ChampFindUniqueStart(I,pattern,target,NULL),
1,NULL,false))
c++;
list = I->Int[list].link;
}
return(c);
}
int ChampExact_1VN_N(CChamp *I,int pattern, int list)
{
int target;
int c = 0;
/* NOTE: should call generalize on molecules first!!! */
ChampPreparePattern(I,pattern);
while(list) {
target = I->Int[list].value;
if(pattern==target)
c++;
else {
ChampPrepareTarget(I,target);
if(ChampMatch(I,pattern,target,
ChampFindUniqueStart(I,pattern,target,NULL),
1,NULL,false)) {
if(ChampMatch(I,target,pattern,
ChampFindUniqueStart(I,target,pattern,NULL),
1,NULL,false)) {
c++;
}
}
}
list = I->Int[list].link;
}
return(c);
}
int ChampMatch_NV1_N(CChamp *I,int list,int target,int limit,int tag_mode)
{
int pattern;
int c = 0;
ChampPrepareTarget(I,target);
while(list) {
pattern = I->Int[list].value;
ChampPreparePattern(I,pattern);
if(ChampMatch(I,pattern,target,
ChampFindUniqueStart(I,pattern,target,NULL),
limit,NULL,tag_mode))
c++;
list = I->Int[list].link;
}
return(c);
}
int ChampFindUniqueStart(CChamp *I,int template, int target,int *multiplicity)
{ /* returns zero for no match */
int unique_tmpl,unique_targ;
int tmpl_atom,targ_atom;
int best_unique = 0;
int score;
int best_score = 0;
unique_tmpl = I->Pat[template].unique_atom;
while(unique_tmpl) {
tmpl_atom = I->Int3[unique_tmpl].value[0];
unique_targ = I->Pat[target].unique_atom;
score = 0;
while(unique_targ) {
targ_atom = I->Int3[unique_targ].value[0];
if(ChampAtomMatch(I->Atom + tmpl_atom,I->Atom + targ_atom))
score += I->Int3[unique_targ].value[1];
unique_targ = I->Int3[unique_targ].link;
}
#ifdef MATCHDEBUG
if(!score) {
printf("unable to match: ");
ChampAtomDump(I,tmpl_atom);
}
#endif
if(!score) return 0; /* no matched atom */
score = score * I->Int3[unique_tmpl].value[1]; /* calculate multiplicity */
if((!best_score)||(score<best_score)) {
best_unique = unique_tmpl;
best_score = score;
}
unique_tmpl = I->Int3[unique_tmpl].link;
}
if(multiplicity) *multiplicity=best_score;
return(best_unique);
}
void ChampMatchFreeChain(CChamp *I,int match_idx)
{
int next_idx;
while(match_idx) {
next_idx = I->Match[match_idx].link;
ChampMatchFree(I,match_idx);
match_idx = next_idx;
}
}
void ChampMatchFree(CChamp *I,int match_idx)
{
if(match_idx) {
ListElemFreeChain(I->Int2,I->Match[match_idx].atom);
ListElemFreeChain(I->Int2,I->Match[match_idx].bond);
ListElemFree(I->Match,match_idx);
}
}
void ChampMatchDump(CChamp *I,int match_idx)
{
int m_atom_idx,atom_idx;
int m_bond_idx,bond_idx;
if(match_idx) {
m_atom_idx = I->Match[match_idx].atom;
m_bond_idx = I->Match[match_idx].bond;
while(m_atom_idx) {
atom_idx = I->Int2[m_atom_idx].value[0];
ChampAtomDump(I,atom_idx);
printf("(%2d,%2d)-",atom_idx,I->Atom[atom_idx].index);
atom_idx = I->Int2[m_atom_idx].value[1];
ChampAtomDump(I,atom_idx);
printf("(%2d,%2d)\n",atom_idx,I->Atom[atom_idx].index);
m_atom_idx = I->Int2[m_atom_idx].link;
}
while(m_bond_idx) {
bond_idx = I->Int2[m_bond_idx].value[0];
printf("%2d:%2d(%2d)-",I->Bond[bond_idx].atom[0],
I->Bond[bond_idx].atom[1],bond_idx);
bond_idx = I->Int2[m_bond_idx].value[1];
printf("%2d:%2d(%2d)\n",I->Bond[bond_idx].atom[0],
I->Bond[bond_idx].atom[1],bond_idx);
m_bond_idx = I->Int2[m_bond_idx].link;
}
}
}
int ChampMatch(CChamp *I,int template,int target,int unique_start,
int n_wanted,int *match_start,int tag_mode)
{ /* returns whether or not substructure exists, but doesn't do alignment */
int n_match = 0;
int start_targ;
int tmpl_atom,targ_atom;
int rep_targ_atom;
int unique_targ;
#ifdef MATCHDEBUG
printf("\n\n ChampMatch: temp %d targ %d uniq %d n_want %d start %p tag %d\n",
template,target,unique_start,n_wanted,match_start,tag_mode);
#endif
/* we'll only need to start the search from the represenatative atom for this type
(it isn't necc. to iterate through the others, since we'll be trying all
combinations within the target...
*/
if(unique_start) {
tmpl_atom = I->Int3[unique_start].value[0];
unique_targ = I->Pat[target].unique_atom;
while(unique_targ) { /* iterate through all unique types of target atoms */
rep_targ_atom = I->Int3[unique_targ].value[0];
if(ChampAtomMatch(I->Atom + tmpl_atom,I->Atom + rep_targ_atom))
{
start_targ = I->Int3[unique_targ].value[2];
while(start_targ) { /* iterate through all target atoms of this type */
targ_atom = I->Int[start_targ].value;
/* we now have starting atoms for each structure */
n_match += ChampMatch2(I,template,target,
tmpl_atom,targ_atom,
(n_wanted-n_match),
match_start,tag_mode);
start_targ = I->Int[start_targ].link;
if(n_match>=n_wanted) break;
}
}
if(n_match>=n_wanted) break;
unique_targ = I->Int3[unique_targ].link;
}
}
return(n_match);
}
int ChampMatch2(CChamp *I,int template,int target,
int start_tmpl,int start_targ,int n_wanted,
int *match_start,int tag_mode)
{ /* does the template covalent tree match the target? */
/* basic algorithm is a multilayer mark-and-sweep traversal
through all atoms and bonds starting from the
two nucleating atoms, which are assumed to be equivalent */
int n_match = 0;
int stereo_template = false;
#ifdef MATCHDEBUG
printf("\n\n ChampMatch2: %d %d\n",start_tmpl,start_targ);
#endif
{
/* first, initialize the marks */
ListAtom *at;
ListBond *bd;
int cur_atom,cur_bond;
/* template uses mark_tmpl */
cur_atom = I->Pat[template].atom;
while(cur_atom) {
at = I->Atom+cur_atom;
at->mark_tmpl=0;
if(at->stereo) stereo_template = true;
cur_atom = at->link;
}
cur_bond = I->Pat[template].bond;
while(cur_bond) {
bd = I->Bond+cur_bond;
bd->mark_tmpl=0;
cur_bond = bd->link;
}
/* target uses mark_targ */
cur_atom = I->Pat[target].atom;
while(cur_atom) {
at = I->Atom+cur_atom;
at->mark_targ=0;
cur_atom = at->link;
}
cur_bond = I->Pat[target].bond;
while(cur_bond) {
bd = I->Bond+cur_bond;
bd->mark_targ=0;
cur_bond = bd->link;
}
}
/* okay, now start our sweep... */
{
int tmpl_stack=0;
int targ_stack=0;
int tmpl_par;
int tmpl_idx,targ_idx;
int bond_off;
int bond_idx,atom_idx;
int mode=0; /* 0: iterate template atom, 1: need target atom */
int done_flag=false;
int match_flag;
int bond_start;
int atom_start;
int stereo_match;
ListTmpl *tmpl_ent,*parent_ent;
ListTarg *targ_ent;
ListAtom *tmpl_at,*targ_at,*base_at;
ListMatch *match_ent;
ListInt2 *int2;
/* stack entries contain:
0: the respective bond & atom indices
1: current bond index in atom
2: backward entry index (branch point)
*/
/* initialize target stack */
targ_stack = ListElemPush(&I->Targ,targ_stack);
targ_ent = I->Targ + targ_stack;
targ_ent->atom = start_targ;
targ_ent->bond = 0; /* to get here */
/* initalize template stack */
tmpl_stack = ListElemPush(&I->Tmpl,tmpl_stack);
tmpl_ent = I->Tmpl + tmpl_stack;
tmpl_ent->atom = start_tmpl;
tmpl_ent->parent = 0;
tmpl_ent->bond = 0; /* bond index to get here */
tmpl_ent->match = targ_stack; /* matching target index */
tmpl_ent->targ_start = 0; /* for matches */
/* initialize marks */
I->Atom[start_tmpl].mark_tmpl=1;
I->Atom[start_tmpl].first_tmpl = tmpl_stack;
I->Atom[start_targ].mark_targ=1;
I->Atom[start_targ].first_targ = targ_stack;
while(!done_flag) {
if(!targ_stack) break;
#ifdef MATCHDEBUG
printf("============================\n");
printf("tmpl: ");
tmpl_ent = I->Tmpl + tmpl_stack;
while(1) {
ChampAtomDump(I,tmpl_ent->atom);
printf("(%2d) ",tmpl_ent->atom);
if(tmpl_ent->link)
tmpl_ent=I->Tmpl + tmpl_ent->link;
else
break;
}
printf("\ntarg: ");
targ_ent = I->Targ + targ_stack;
while(1) {
ChampAtomDump(I,targ_ent->atom);
printf("(%2d) ",targ_ent->atom);
if(targ_ent->link)
targ_ent=I->Targ + targ_ent->link;
else
break;
}
printf("\n");
#endif
switch(mode) {
case 0: /* iterate template atom */
/* first, see if there is an open bond somewhere in the tree... */
tmpl_par = tmpl_stack; /* start from last entry */
while(tmpl_par) {
tmpl_ent = I->Tmpl + tmpl_par;
tmpl_at = I->Atom + tmpl_ent->atom; /* get atom record */
bond_off = 0;
bond_idx = tmpl_at->bond[bond_off];
while(bond_idx) { /* iterate over all bonds */
if(I->Bond[bond_idx].mark_tmpl) { /* skip over marked bonds...*/
bond_off++;
bond_idx = tmpl_at->bond[bond_off];
} else
break; /* found an open bond */
}
if(bond_idx) { /* there is an open bond */
#ifdef MATCHDEBUG
printf(" tmpl: bond_idx %d is open (%2d,%2d)\n",bond_idx,
I->Bond[bond_idx].atom[0],I->Bond[bond_idx].atom[1]);
#endif
atom_idx = I->Bond[bond_idx].atom[1];
if(atom_idx==tmpl_ent->atom)
atom_idx = I->Bond[bond_idx].atom[0];
I->Bond[bond_idx].mark_tmpl++; /* mark bond "in use" */
tmpl_stack = ListElemPush(&I->Tmpl,tmpl_stack); /* allocate new record for atom */
tmpl_ent = I->Tmpl + tmpl_stack;
tmpl_ent->atom = atom_idx; /* record which atom */
tmpl_ent->parent = tmpl_par; /* record parent identity */
tmpl_ent->bond = bond_idx; /* record bond index */
tmpl_ent->targ_start = 0; /* launch searches from this bond in parent */
tmpl_ent->match = 0; /* no match yet */
tmpl_at = I->Atom + atom_idx;
tmpl_at->mark_tmpl++; /* mark atom "in use" */
if(tmpl_at->mark_tmpl==1) { /* record where it was used */
tmpl_at->first_tmpl=tmpl_stack;
}
#ifdef MATCHDEBUG
printf(" tmpl: created tmpl_idx %d tmpl_par %d\n",
tmpl_stack,tmpl_par);
printf(" targ: tmpl atom %d:",tmpl_ent->atom);
ChampAtomDump(I,tmpl_ent->atom);
printf("\n");
#endif
mode = 1; /* now we need to find matching template bond/atom */
break; /* ...leave loop to hunt */
} else {
#ifdef MATCHDEBUG
printf(" tmpl: nothing found in level %d...\n",tmpl_par);
#endif
tmpl_par = tmpl_ent->parent; /* otherwise, proceed up tree looking for
nearest open bond */
}
}
if(!tmpl_par) { /* no open bonds-> complete match */
#ifdef MATCHDEBUG2
printf(" tmpl: EXACT MATCH DETECTED\n");
printf(" %d %d %d %d\n",template,target,start_tmpl,start_targ);
printf("tmpl: ");
tmpl_ent = I->Tmpl + tmpl_stack;
while(1) {
ChampAtomDump(I,tmpl_ent->atom);
printf("(%2d) ",tmpl_ent->atom);
if(tmpl_ent->link)
tmpl_ent=I->Tmpl + tmpl_ent->link;
else
break;
}
printf("\ntarg: ");
targ_ent = I->Targ + targ_stack;
while(1) {
ChampAtomDump(I,targ_ent->atom);
printf("(%2d) ",targ_ent->atom);
if(targ_ent->link)
targ_ent=I->Targ + targ_ent->link;
else
break;
}
printf("\n");
#endif
stereo_match = true;
if(stereo_template) { /* must check stereochemistry */
int tmpl_idx2;
int targ_idx2;
int bond_idx2;
int bond_off2;
int n_pri;
int tmpl_pri[MAX_BOND];
int targ_pri[MAX_BOND];
ListTmpl *tmpl_ent2;
ListTarg *targ_ent2;
ListBond *tmpl_bd,*targ_bd;
ListAtom *tmpl_at1,*targ_at1;
tmpl_idx = tmpl_stack; /* prepare to traverse... */
while(tmpl_idx) {
tmpl_ent = I->Tmpl + tmpl_idx;
I->Atom[tmpl_ent->atom].mark_read = false;
tmpl_idx = tmpl_ent->link;
}
tmpl_idx = tmpl_stack;
targ_idx = targ_stack;
while(tmpl_idx&&targ_idx&&stereo_match) {
tmpl_ent = I->Tmpl + tmpl_idx;
targ_ent = I->Targ + targ_idx;
tmpl_at1 = I->Atom + tmpl_ent->atom;
targ_at1 = I->Atom + targ_ent->atom;
if(!tmpl_at1->mark_read) { /* only compare non-virtual atoms */
tmpl_at1->mark_read = true;
if(tmpl_at1->stereo) {
if(!targ_at1->stereo) {
/*
printf("failure 1 %d %d %d %d \n",tmpl_at1->stereo,targ_at1->stereo,
tmpl_ent->atom,targ_ent->atom);*/
stereo_match = false;
}
/* this is a stereo-center -- so locate the corresponding bond priorities in
template and the target */
n_pri = 0;
if(tmpl_at1->imp_hydro==1) { /* deal with implicit hydrogen case */
tmpl_pri[n_pri]=0; /* considered first, since atom priorities always > 0 */
if(targ_at1->imp_hydro==1)
targ_pri[n_pri]=0;
else if(targ_at1->tot_hydro==1) {
/* track down the wayward hydrogen priority */
bond_off2 = 0;
bond_idx2 = targ_at1->bond[bond_off2];
while(bond_idx2) { /* iterate over all bonds */
targ_bd = I->Bond + bond_idx2;
if(I->Atom[targ_bd->atom[0]].atom&cH_H) {
targ_pri[n_pri] = targ_bd->pri[0];
break;
} else if(I->Atom[targ_bd->atom[1]].atom&cH_H) {
targ_pri[n_pri] = targ_bd->pri[1];
break;
}
bond_off2++;
bond_idx2 = targ_at1->bond[bond_off2];
if(!bond_idx2)
err_message("ChampMatch2","can't locate explicit hydrogen on target!");
}
/* failure? */
} else {
printf("failure 2\n");
stereo_match = false;
}
n_pri++;
}
if(!stereo_match)
break;
tmpl_idx2 = tmpl_stack; /* prepare to traverse bonds... */
while(tmpl_idx2) {
tmpl_ent2 = I->Tmpl + tmpl_idx2;
if(tmpl_ent2->bond)
I->Bond[tmpl_ent2->bond].mark_read = false;
tmpl_idx2 = tmpl_ent2->link;
}
tmpl_idx2 = tmpl_stack;
targ_idx2 = targ_stack;
while(tmpl_idx2&&targ_idx2) {
tmpl_ent2 = I->Tmpl + tmpl_idx2;
targ_ent2 = I->Targ + targ_idx2;
if(tmpl_ent2->bond&&targ_ent2->bond) {
tmpl_bd = I->Bond + tmpl_ent2->bond;
if(!tmpl_bd->mark_read) {
tmpl_bd->mark_read = true;
targ_bd = I->Bond + targ_ent2->bond;
/* if matched bond touches this atom, then
get bond priority for the atom */
if(tmpl_bd->atom[0]==tmpl_ent->atom) {
tmpl_pri[n_pri]=tmpl_bd->pri[0];
if(targ_bd->atom[0]==targ_ent->atom) {
targ_pri[n_pri]=targ_bd->pri[0];
} else { /* assuming targ_bd->atom[1]==targ_ent->atom */
targ_pri[n_pri]=targ_bd->pri[1];
}
if(n_pri<4)
n_pri++;
else {
err_message("ChampMatch2","too many connections on chiral atom!");
}
} else if(tmpl_bd->atom[1]==tmpl_ent->atom) {
tmpl_pri[n_pri]=tmpl_bd->pri[1];
if(targ_bd->atom[0]==targ_ent->atom) {
targ_pri[n_pri]=targ_bd->pri[0];
} else { /* assuming targ_bd->atom[1]==targ_ent->atom */
targ_pri[n_pri]=targ_bd->pri[1];
}
if(n_pri<4)
n_pri++;
else {
err_message("ChampMatch2","too many connections on chiral atom!");
}
}
}
}
tmpl_idx2 = tmpl_ent2->link;
targ_idx2 = targ_ent2->link;
}
if(n_pri<4) {
err_message("ChampMatch2","stereo comparison on achiral atom.");
stereo_match = false;
} else {
/* now test for a handedness match */
int tmpl_hand,targ_hand;
tmpl_hand = ChiralHandedness(tmpl_pri);
targ_hand = ChiralHandedness(targ_pri);
if(tmpl_at1->stereo==targ_at1->stereo) { /* same clockwizedness */
if(tmpl_hand!=targ_hand) { /* opposite handedness */
stereo_match=false;
}
} else { /* opposite clockwizedness */
if(tmpl_hand==targ_hand) { /* same handedness */
stereo_match = false;
}
}
}
}
}
tmpl_idx = tmpl_ent->link;
targ_idx = targ_ent->link;
}
}
if(stereo_match) {
n_match++;
if(n_wanted>0)
if(n_match>=n_wanted) done_flag=true;
if(match_start) {
(*match_start) = ListElemPush(&I->Match,*match_start);
match_ent = I->Match + (*match_start);
atom_start=0;
bond_start=0;
tmpl_idx = tmpl_stack; /* prepare to traverse... */
while(tmpl_idx) {
tmpl_ent = I->Tmpl + tmpl_idx;
I->Atom[tmpl_ent->atom].mark_read = false;
tmpl_idx = tmpl_ent->link;
}
tmpl_idx = tmpl_stack;
targ_idx = targ_stack;
while(tmpl_idx&&targ_idx) {
tmpl_ent = I->Tmpl + tmpl_idx;
targ_ent = I->Targ + targ_idx;
if(!I->Atom[tmpl_ent->atom].mark_read) { /* save non-virtual atom match */
I->Atom[tmpl_ent->atom].mark_read = true;
atom_start = ListElemPush(&I->Int2,atom_start);
int2 = I->Int2 + atom_start;
int2->value[0] = tmpl_ent->atom;
int2->value[1] = targ_ent->atom;
}
if(tmpl_ent->bond) { /* record bond match */
bond_start = ListElemPush(&I->Int2,bond_start);
int2 = I->Int2 + bond_start;
int2->value[0] = tmpl_ent->bond;
int2->value[1] = targ_ent->bond;
}
tmpl_idx = tmpl_ent->link;
targ_idx = targ_ent->link;
}
match_ent->atom = atom_start;
match_ent->bond = bond_start;
#ifdef MATCHDEBUG2
/* ChampPatDump(I,template);
ChampPatDump(I,target);*/
ChampMatchDump(I,*match_start);
#endif
}
if(tag_mode) { /* are we using tags to mark atoms and bonds? */
tmpl_idx = tmpl_stack; /* prepare to read... */
while(tmpl_idx) {
tmpl_ent = I->Tmpl + tmpl_idx;
I->Atom[tmpl_ent->atom].mark_read = false;
tmpl_idx = tmpl_ent->link;
}
tmpl_idx = tmpl_stack;
targ_idx = targ_stack;
while(tmpl_idx&&targ_idx) {
tmpl_ent = I->Tmpl + tmpl_idx;
targ_ent = I->Targ + targ_idx;
if(!I->Atom[tmpl_ent->atom].mark_read) { /* save non-virtual atom match */
I->Atom[tmpl_ent->atom].mark_read = true;
if(tag_mode==cTag_merge) { /* merge */
I->Atom[targ_ent->atom].tag |= I->Atom[tmpl_ent->atom].tag;
I->Atom[targ_ent->atom].tag &= (0xFFFFFFFF^I->Atom[tmpl_ent->atom].not_tag);
} else if(tag_mode==cTag_copy) { /* copy */
I->Atom[targ_ent->atom].tag = I->Atom[tmpl_ent->atom].tag;
}
}
if(tmpl_ent->bond) { /* record bond match */
if(tag_mode==cTag_merge) {
I->Bond[targ_ent->bond].tag |= I->Bond[tmpl_ent->bond].tag;
I->Bond[targ_ent->bond].tag &= (0xFFFFFFFF^I->Bond[tmpl_ent->bond].not_tag);
} else if(tag_mode==cTag_copy) {
I->Bond[targ_ent->bond].tag = I->Bond[tmpl_ent->bond].tag;
}
}
tmpl_idx = tmpl_ent->link;
targ_idx = targ_ent->link;
}
}
}
/* back-out the last target match... */
targ_ent = I->Targ + targ_stack;
atom_idx = targ_ent->atom;
I->Atom[atom_idx].mark_targ--; /* free-up atom */
bond_idx = targ_ent->bond;
I->Bond[bond_idx].mark_targ--; /* free-up bond */
targ_stack = ListElemPop(I->Targ,targ_stack);
mode = 1; /* now we need to find another matching template bond/atom */
}
break;
case 1:
/* try to locate matching target atom & bond */
tmpl_ent = I->Tmpl + tmpl_stack;
bond_off = tmpl_ent->targ_start; /* start with this bond */
parent_ent = I->Tmpl + tmpl_ent->parent; /* get parent of current template atom */
targ_ent = I->Targ + parent_ent->match; /* start from target atom which
matches the parent in template */
base_at = I->Atom + targ_ent->atom;
#ifdef MATCHDEBUG
printf(" targ: tmpl_stack %d bond_off %d\n",tmpl_stack,bond_off);
printf(" targ: match %d targ root atom %d:",parent_ent->match,targ_ent->atom);
ChampAtomDump(I,targ_ent->atom);
printf("\n");
printf(" targ: bonds: ");
bond_start = bond_off;
bond_idx = base_at->bond[bond_start];
while(bond_idx) { /* iterate over all bonds */
bond_start++;
printf("%d ",bond_idx);
bond_idx = base_at->bond[bond_start];
}
printf("\n");
#endif
bond_idx = base_at->bond[bond_off];
while(bond_idx) { /* iterate over all bonds */
#ifdef MATCHDEBUG
printf(" targ: trying %d bond_idx %d (%2d-%2d)\n",bond_off,bond_idx,
I->Bond[bond_idx].atom[0],I->Bond[bond_idx].atom[1]);
#endif
match_flag=false;
tmpl_ent->targ_start = bond_off + 1; /* insure we never duplicate attempt */
if(!I->Bond[bond_idx].mark_targ) { /* is bond unmarked...*/
atom_idx = I->Bond[bond_idx].atom[1]; /* find other atom */
if(atom_idx==targ_ent->atom)
atom_idx = I->Bond[bond_idx].atom[0];
tmpl_at = I->Atom+tmpl_ent->atom;
targ_at = I->Atom+atom_idx;
if((tmpl_at->mark_tmpl-targ_at->mark_targ)==1) {/* is atom available? */
if(ChampBondMatch(I->Bond+tmpl_ent->bond,
I->Bond+bond_idx)) /* does bond match? */ {
if(ChampAtomMatch(tmpl_at,targ_at)) /* does atom match? */
{
match_flag=true;
if(targ_at->mark_targ==1) { /* virtual */
/* verify that matching virtual atoms correspond to matching real atoms */
#ifdef MATCHDEBUG
printf(" targ: first_tmpl %d first_targ %d\n",
tmpl_at->first_tmpl,targ_at->first_targ);
#endif
if(I->Tmpl[tmpl_at->first_tmpl].match!=targ_at->first_targ)
match_flag=false;
}
if(match_flag) {
I->Bond[bond_idx].mark_targ++; /* mark bond "in use" */
targ_stack = ListElemPush(&I->Targ,targ_stack); /* allocate new record for atom */
targ_ent = I->Targ + targ_stack;
targ_ent->atom = atom_idx;
targ_ent->bond = bond_idx;
targ_at->mark_targ++; /* mark atom "in use" */
if(targ_at->mark_targ==1) { /* record where it was used */
targ_at->first_targ = targ_stack;
}
/* inform template entry about match */
tmpl_ent->match = targ_stack;
match_flag=true;
mode = 0; /* we have a candidate atom/bond,
so return to template */
#ifdef MATCHDEBUG
printf(" targ: MATCHED atoms %d & %d (bond %d):",
tmpl_ent->atom,
targ_ent->atom,
bond_idx);
ChampAtomDump(I,tmpl_ent->atom);
ChampAtomDump(I,targ_ent->atom);
printf("\n");
#endif
break;
}
} else {
#ifdef MATCHDEBUG
printf(" atom match failed %d vs %d ",tmpl_ent->atom,atom_idx);
ChampAtomDump(I,tmpl_ent->atom);
ChampAtomDump(I,atom_idx);
printf("\n");
#endif
}
} else {
#ifdef MATCHDEBUG
printf(" bond match failed\n");
#endif
}
}
}
if(!match_flag) {
bond_off++;
bond_idx = base_at->bond[bond_off];
}
}
if(mode) { /* unable to locate a match */
/* so back-off the previous template atom... */
tmpl_ent = I->Tmpl + tmpl_stack;
atom_idx = tmpl_ent->atom;
I->Atom[atom_idx].mark_tmpl--; /* free-up atom */
bond_idx = tmpl_ent->bond;
I->Bond[bond_idx].mark_tmpl--; /* free-up bond */
tmpl_stack = ListElemPop(I->Tmpl,tmpl_stack);
/* and back-off the previous target match */
targ_ent = I->Targ + targ_stack;
atom_idx = targ_ent->atom;
I->Atom[atom_idx].mark_targ--; /* free-up atom */
bond_idx = targ_ent->bond;
I->Bond[bond_idx].mark_targ--; /* free-up bond */
targ_stack = ListElemPop(I->Targ,targ_stack);
}
}
}
ListElemFreeChain(I->Tmpl,tmpl_stack);
ListElemFreeChain(I->Targ,targ_stack);
}
#ifdef MATCHDEBUG
printf(" ChampMatch2: returning n_match = %d\n",
n_match);
#endif
return n_match;
}
/* ===============================================================
* Smiles
* =============================================================== */
char *ChampParseBlockAtom(CChamp *I,char *c,int atom,int mask,int len,int not_flag)
{
ListAtom *at;
at=I->Atom+atom;
if(not_flag) {
at->not_atom |= mask;
at->neg_flag = true;
} else {
at->atom |= mask;
at->pos_flag = true;
}
at->hydro_flag=true;
PRINTFD(FB_smiles_parsing)
" ChampParseBlockAtom: called.\n"
ENDFD;
if(mask==cH_Sym) {
if(len==1) {
at->symbol[0]=*c;
at->symbol[1]=0;
} else if(len==2) {
at->symbol[0]=*c;
at->symbol[1]=*(c+1);
at->symbol[2]=0;
}
}
/* need to include code for loading symbol */
return c+len;
}
char *ChampParseAliphaticAtom(CChamp *I,char *c,int atom,int mask,int len,int imp_hyd)
{
ListAtom *at;
at=I->Atom+atom;
at->atom |= mask;
at->pos_flag = true;
at->comp_imp_hydro_flag = imp_hyd;
PRINTFD(FB_smiles_parsing)
" ChampParseAliphaticAtom: called.\n"
ENDFD;
/* need to include code for loading symbol */
return c+len;
}
char *ChampParseAromaticAtom(CChamp *I,char *c,int atom,int mask,int len,int imp_hyd)
{
ListAtom *at;
at=I->Atom+atom;
at->atom |= mask;
at->class |= cH_Aromatic;
at->pos_flag = true;
at->comp_imp_hydro_flag = imp_hyd;
PRINTFD(FB_smiles_parsing)
" ChampParseAromaticAtom: called.\n"
ENDFD;
return c+len;
}
char *ChampParseStringAtom(CChamp *I,char *c,int atom,int len)
{
ListAtom *at;
at=I->Atom+atom;
at->atom |= cH_Any;
at->symbol[0]=c[0];
at->symbol[1]=c[1];
at->pos_flag = true;
PRINTFD(FB_smiles_parsing)
" ChampParseStringAtom: called.\n"
ENDFD;
return c+len;
}
int ChampParseNumeral(char *c);
int ChampParseNumeral(char *c) {
switch(*c) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
return(*c-'0');
break;
default:
return(-1);
}
}
int ChampParseAtomBlock(CChamp *I,char **c_ptr,int cur_atom)
{
int ok = true;
ListAtom *at;
char *c;
int not_flag = false;
int num;
int done=false;
int atom_seen = false;
/* int done;*/
c=*c_ptr;
at=I->Atom+cur_atom;
at->comp_imp_hydro_flag = false;
while(ok&&!done) {
switch(*c) {
case ']':
done=true;
c++;
break;
case 0:
done=true;
break;
case '!':
c++;
not_flag=true;
atom_seen = false;
break;
case ',':
c++;
atom_seen = false;
break;
case ';':
not_flag=false;
c++;
atom_seen = false;
break;
case '*': /* nonstandard */
c = ChampParseBlockAtom(I,c,cur_atom,cH_Any,1,not_flag);
atom_seen = true;
break;
case '?': /* nonstandard */
c = ChampParseBlockAtom(I,c,cur_atom,cH_NotH,1,not_flag);
atom_seen = true;
break;
case '@':
num = ChampParseNumeral(c+1);
if(num>=0) {
c+=2;
} else {
num = 1;
c++;
while(*c) {
if(*c!='@')
break;
else {
num++;
c++;
}
}
}
if(num&0x1) /* odd */
I->Atom[cur_atom].stereo = cH_Anticlock;
else
I->Atom[cur_atom].stereo = cH_Clockwise;
break;
case 'A': /* note there is no way to address the 'A' symbol ...*/
switch(*(c+1)) {
case 'c':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'g':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'l':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'm':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 's':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'u':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
default:
if(not_flag) {
I->Atom[cur_atom].neg_flag=true;
I->Atom[cur_atom].not_class|=cH_Aliphatic;
} else {
I->Atom[cur_atom].pos_flag=true;
I->Atom[cur_atom].class|=cH_Aliphatic;
}
c++;
}
break;
case 'a':
if(not_flag) {
I->Atom[cur_atom].neg_flag=true;
I->Atom[cur_atom].not_class|=cH_Aromatic;
} else {
I->Atom[cur_atom].class|=cH_Aromatic;
I->Atom[cur_atom].pos_flag=true;
}
c++;
break;
case 'B':
switch(*(c+1)) {
case 'a':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'e':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'i':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'r':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Br,2,not_flag);
atom_seen = true;
break;
default:
c = ChampParseBlockAtom(I,c,cur_atom,cH_B,1,not_flag);
atom_seen = true;
break;
}
break;
case 'C':
switch(*(c+1)) {
case 'a':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Ca,2,not_flag);
atom_seen = true;
break;
case 'd':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'e':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'o':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'r':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 's':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'u':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Cu,2,not_flag);
atom_seen = true;
break;
case 'l':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Cl,2,not_flag);
atom_seen = true;
break;
default:
c = ChampParseBlockAtom(I,c,cur_atom,cH_C,1,not_flag);
atom_seen = true;
break;
}
break;
case 'D':
switch(*(c+1)) {
case 'y':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
default:
num = ChampParseNumeral(c+1);
if(num>=0) {
if(not_flag) {
I->Atom[cur_atom].neg_flag=true;
I->Atom[cur_atom].not_degree|=num_to_degree[num];
} else {
I->Atom[cur_atom].pos_flag=true;
I->Atom[cur_atom].degree|=num_to_degree[num];
}
c+=2;
} else
ok=false;
break;
}
break;
case 'E':
switch(*(c+1)) {
case 'r':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'u':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
default:
c = ChampParseBlockAtom(I,c,cur_atom,cH_E,1,not_flag);
atom_seen = true;
break;
}
break;
case 'F':
switch(*(c+1)) {
case 'e':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Fe,2,not_flag);
atom_seen = true;
break;
case 'r':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
default:
c = ChampParseBlockAtom(I,c,cur_atom,cH_F,1,not_flag);
atom_seen = true;
break;
}
break;
case 'G':
switch(*(c+1)) {
case 'a':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'd':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'e':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
}
break;
case 'H':
switch(*(c+1)) {
case 'e':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'f':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'g':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'o':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
default:
if(!atom_seen) {
c = ChampParseBlockAtom(I,c,cur_atom,cH_H,1,not_flag);
atom_seen = true;
} else {
num = ChampParseNumeral(c+1);
if(num>=0) {
c+=2;
} else {
num = 1;
c++;
while(*c) {
if(*c!='H')
break;
else {
num++;
c++;
}
}
}
I->Atom[cur_atom].imp_hydro = num;
I->Atom[cur_atom].tot_hydro = num;
I->Atom[cur_atom].hydro_flag = true;
/* turn on hydrogen count matching for this atom */
}
break;
}
break;
case 'I':
switch(*(c+1)) {
case 'n':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'r':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
default:
c = ChampParseBlockAtom(I,c,cur_atom,cH_I,1,not_flag);
atom_seen = true;
break;
}
break;
case 'J':
c = ChampParseBlockAtom(I,c,cur_atom,cH_J,1,not_flag);
atom_seen = true;
break;
case 'K':
c = ChampParseBlockAtom(I,c,cur_atom,cH_K,1,not_flag);
atom_seen = true;
break;
case 'L':
switch(*(c+1)) {
case 'a':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'i':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'u':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
default:
c = ChampParseBlockAtom(I,c,cur_atom,cH_L,1,not_flag);
atom_seen = true;
}
break;
case 'M':
switch(*(c+1)) {
case 'n':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'o':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'g':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Mg,2,not_flag);
atom_seen = true;
break;
default:
c = ChampParseBlockAtom(I,c,cur_atom,cH_M,1,not_flag);
atom_seen = true;
break;
}
break;
case 'N':
switch(*(c+1)) {
case 'a':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Na,2,not_flag);
atom_seen = true;
break;
case 'b':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'd':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'i':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
default:
c = ChampParseBlockAtom(I,c,cur_atom,cH_N,1,not_flag);
atom_seen = true;
break;
}
break;
case 'O':
switch(*(c+1)) {
case 's':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
default:
c = ChampParseBlockAtom(I,c,cur_atom,cH_O,1,not_flag);
atom_seen = true;
break;
}
break;
case 'P':
switch(*(c+1)) {
case 'b':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'd':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'o':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'r':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 't':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
default:
c = ChampParseBlockAtom(I,c,cur_atom,cH_P,1,not_flag);
atom_seen = true;
}
break;
case 'p': /* Pi system */
if(not_flag) {
I->Atom[cur_atom].neg_flag=true;
I->Atom[cur_atom].not_class|=cH_Pi;
} else {
I->Atom[cur_atom].pos_flag=true;
I->Atom[cur_atom].class|=cH_Pi;
}
c++;
break;
case 'R':
switch(*(c+1)) {
case 'b':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'e':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'h':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'u':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
default:
c = ChampParseBlockAtom(I,c,cur_atom,cH_R,1,not_flag);
atom_seen = true;
break;
}
break;
case 'r':
num = ChampParseNumeral(c+1);
if(num>=0) {
if(not_flag) {
I->Atom[cur_atom].neg_flag=true;
I->Atom[cur_atom].not_cycle|=num_to_ring[num];
} else {
I->Atom[cur_atom].pos_flag=true;
I->Atom[cur_atom].cycle|=num_to_ring[num];
}
c+=2;
} else {
if(not_flag) {
I->Atom[cur_atom].neg_flag=true;
I->Atom[cur_atom].not_cycle|=cH_Ring3|cH_Ring4|cH_Ring5|cH_Ring6|cH_Ring7|cH_Ring8;
} else {
I->Atom[cur_atom].pos_flag=true;
I->Atom[cur_atom].cycle|=cH_Ring3|cH_Ring4|cH_Ring5|cH_Ring6|cH_Ring7|cH_Ring8;
}
c++;
}
break;
case 'Q':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Q,1,not_flag);
atom_seen = true;
break;
case 'S':
switch(*(c+1)) {
case 'b':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'c':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'e':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Se,2,not_flag);
atom_seen = true;
break;
case 'i':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'm':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'n':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'r':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
default:
c = ChampParseBlockAtom(I,c,cur_atom,cH_S,1,not_flag);
atom_seen = true;
break;
}
break;
case 'T':
switch(*(c+1)) {
case 'a':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'b':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'e':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'i':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'h':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'l':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'm':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
default:
c = ChampParseBlockAtom(I,c,cur_atom,cH_T,1,not_flag);
atom_seen = true;
}
break;
case 'V':
switch(*(c+1)) {
default:
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,1,not_flag);
atom_seen = true;
}
break;
case 'v':
num = ChampParseNumeral(c+1);
if(num>=0) {
if(not_flag) {
I->Atom[cur_atom].neg_flag=true;
I->Atom[cur_atom].not_valence|=num_to_valence[num];
} else {
I->Atom[cur_atom].pos_flag=true;
I->Atom[cur_atom].degree|=num_to_valence[num];
}
c+=2;
} else
ok=false;
break;
case 'W':
switch(*(c+1)) {
default:
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,1,not_flag);
atom_seen = true;
break;
}
break;
case 'U':
switch(*(c+1)) {
default:
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,1,not_flag);
atom_seen = true;
break;
}
break;
case 'Y':
switch(*(c+1)) {
case 'b':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
default:
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,1,not_flag);
atom_seen = true;
break;
}
break;
case 'Z':
switch(*(c+1)) {
case 'r':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Sym,2,not_flag);
atom_seen = true;
break;
case 'n':
c = ChampParseBlockAtom(I,c,cur_atom,cH_Zn,2,not_flag);
atom_seen = true;
break;
default:
c = ChampParseBlockAtom(I,c,cur_atom,cH_Z,1,not_flag);
atom_seen = true;
break;
}
break;
case '+':
num = ChampParseNumeral(c+1);
if(num>=0) {
c+=2;
} else {
num = 1;
c++;
while(*c) {
if(*c!='+')
break;
else {
num++;
c++;
}
}
}
if(not_flag) {
I->Atom[cur_atom].neg_flag=true;
switch(num) {
case 0: I->Atom[cur_atom].not_charge|=cH_Neutral; break;
case 1: I->Atom[cur_atom].not_charge|=cH_Cation; break;
case 2: I->Atom[cur_atom].not_charge|=cH_Dication; break;
case 3: I->Atom[cur_atom].not_charge|=cH_Trication; break;
case 4: I->Atom[cur_atom].not_charge|=cH_Tetcation; break;
case 5: I->Atom[cur_atom].not_charge|=cH_Pentcation; break;
}
} else {
I->Atom[cur_atom].pos_flag=true;
switch(num) {
case 0: I->Atom[cur_atom].charge|=cH_Neutral; break;
case 1: I->Atom[cur_atom].charge|=cH_Cation; break;
case 2: I->Atom[cur_atom].charge|=cH_Dication; break;
case 3: I->Atom[cur_atom].charge|=cH_Trication; break;
case 4: I->Atom[cur_atom].charge|=cH_Tetcation; break;
case 5: I->Atom[cur_atom].charge|=cH_Pentcation; break;
}
}
break;
case '-':
num = ChampParseNumeral(c+1);
if(num>=0) {
c+=2;
} else {
num = 1;
c++;
while(*c) {
if(*c!='-')
break;
else {
num++;
c++;
}
}
}
if(not_flag) {
I->Atom[cur_atom].neg_flag=true;
switch(num) {
case 0: I->Atom[cur_atom].not_charge|=cH_Neutral; break;
case 1: I->Atom[cur_atom].not_charge|=cH_Anion; break;
case 2: I->Atom[cur_atom].not_charge|=cH_Dianion; break;
case 3: I->Atom[cur_atom].not_charge|=cH_Trianion; break;
case 4: I->Atom[cur_atom].not_charge|=cH_Tetanion; break;
case 5: I->Atom[cur_atom].not_charge|=cH_Pentanion; break;
}
} else {
I->Atom[cur_atom].pos_flag=true;
switch(num) {
case 0: I->Atom[cur_atom].charge|=cH_Neutral; break;
case 1: I->Atom[cur_atom].charge|=cH_Anion; break;
case 2: I->Atom[cur_atom].charge|=cH_Dianion; break;
case 3: I->Atom[cur_atom].charge|=cH_Trianion; break;
case 4: I->Atom[cur_atom].charge|=cH_Tetanion; break;
case 5: I->Atom[cur_atom].charge|=cH_Pentanion; break;
}
}
break;
default:
PRINTFB(FB_smiles_parsing,FB_errors)
" champ: error parsing atom block at '%c' in: '%s'\n",*c,*c_ptr
ENDFB;
c++;
break;
}
}
*c_ptr = c;
return ok;
}
#define cSym_Null 0
#define cSym_Atom 1
#define cSym_Bond 2
#define cSym_OpenScope 3
#define cSym_CloseScope 4
#define cSym_Mark 5
#define cSym_OpenBlock 6
#define cSym_CloseBlock 7
#define cSym_Separator 8
#define cSym_Qualifier 9
char *ChampPatToSmiVLA(CChamp *I,int index,char *vla,int mode)
{
char *result;
int n_atom;
int cur_scope = 0;
int cur_atom = 0;
int cur_bond = 0;
int a,c,l;
int mark[MAX_RING];
int mark_bond[MAX_RING];
int i;
int left_to_do = 0;
int next_mark =1;
int start_atom = 0;
AtomBuffer buf;
ListAtom *at1;
ListBond *bd1;
ListScope *scp1,*scp2;
#define concat_s(s,c) {vla_check(result,char,l+(c)+1);strcpy(result+l,(s));l+=(c);}
#define concat_c(s) {vla_check(result,char,l+2);result[l]=(s);result[l+1]=0;l++;}
for(a=0;a<MAX_RING;a++) {
mark[a]=0;
}
cur_atom=I->Pat[index].atom;
n_atom=0;
while(cur_atom) {
n_atom++;
at1 = I->Atom + cur_atom;
at1->mark_tmpl = 0;
cur_atom = at1->link;
}
/*guestimate size requirements, and allocate */
if(!vla)
vla_malloc(result,char,n_atom*4);
else
result = vla;
result[0]=0;
l = 0;
start_atom = I->Pat[index].atom;
while(start_atom) {
if(!I->Atom[start_atom].mark_tmpl) {
if(l) concat_c('.')
cur_scope = ListElemNewZero(&I->Scope);
I->Scope[cur_scope].atom = start_atom;
I->Scope[cur_scope].bond = -1; /* signals start in new scope */
while(cur_scope) {
scp1 = I->Scope + cur_scope;
cur_atom = scp1->atom;
at1=I->Atom + cur_atom;
PRINTFD(FB_smiles_creation)
" SmiToStrVLA: scope %d cur_atom %d base_bond %d\n",cur_scope,cur_atom,scp1->base_bond
ENDFD;
if(scp1->bond<0) { /* starting new scope, so print atom and continue */
/* print bond, if required */
if(scp1->base_bond) {
c = ChampBondToString(I,scp1->base_bond,buf);
concat_s(buf,c);
}
/* write atom string */
at1->mark_tmpl = 1;
c = ChampAtomToString(I,cur_atom,buf);
concat_s(buf,c);
/* sprintf(buf,":%d:",cur_atom);
concat_s(buf,strlen(buf));*/
switch(mode) {
case 0:
break;
case 1:
concat_s("<",1);
concat_s(at1->name,strlen(at1->name));
concat_s(">",1);
break;
}
/* write opening marks */
for(a=0;a<MAX_BOND;a++) {
cur_bond = at1->bond[a];
if(!cur_bond) break;
bd1 = I->Bond+cur_bond;
/* write cycle indicator if necessary */
if(bd1->atom[0]!=cur_atom) {/* opposite direction -> explicit cycle */
if(!I->Atom[bd1->atom[0]].mark_tmpl)
{
if (next_mark >= MAX_RING) {
printf("DEBUG: %s next_mark %d\n", __FUNCTION__, next_mark);
break;
}
if(mark[next_mark]) {
for(index=0;index<9;index++) {
if(!mark[index]) break;
}
} else {
index = next_mark++;
}
if(index<MAX_RING) {
mark[index]=bd1->atom[0]; /* save for matching other end of cycle */
mark_bond[index]=cur_bond;
c = ChampBondToString(I,cur_bond,buf);
concat_s(buf,c);
if(index<10) {
concat_c('0'+index);
} else {
sprintf(buf,"%%%d",index);
concat_s(buf,strlen(buf));
}
}
}
}
}
/* write closing marks */
for(index=0;index<MAX_RING;index++) {
if(mark[index]==cur_atom) {
c = ChampBondToString(I,mark_bond[index],buf);
concat_s(buf,c);
if(index<10) {
concat_c('0'+index);
} else {
sprintf(buf,"%%%d",index);
concat_s(buf,strlen(buf));
}
mark[index]=0;
}
}
}
/* increment bond index index counter */
scp1->bond++;
/* now determine whether or not we need to create a new scope,
and figure out which bond to work on */
i = scp1->bond;
left_to_do = 0;
cur_bond = 0;
while(i<MAX_BOND) {
if(!at1->bond[i]) break;
bd1 = I->Bond + at1->bond[i];
if(bd1->atom[0]==cur_atom) {
if(!I->Atom[bd1->atom[1]].mark_tmpl) { /* not yet complete */
if(!cur_bond) cur_bond = at1->bond[i];
left_to_do++;
}
}
i++;
}
PRINTFD(FB_smiles_creation)
" SmiToStrVLA: cur_atom %d left to do %d cur_bond %d\n",cur_atom,left_to_do,cur_bond
ENDFD;
if(left_to_do>1) { /* yes, we need a new scope */
cur_scope = ListElemPush(&I->Scope,cur_scope);
scp2 = I->Scope + cur_scope;
scp2->base_bond = cur_bond;
scp2->atom = I->Bond[cur_bond].atom[1];
scp2->bond = -1;
concat_c('(');
scp2->paren_flag=true;
PRINTFD(FB_smiles_creation)
" SmiToStrVLA: creating new scope vs old %d\n",cur_scope
ENDFD;
} else if(left_to_do) { /* no we do not, so just extend current scope */
scp1->atom = I->Bond[cur_bond].atom[1];
scp1->base_bond = cur_bond;
scp1->bond = -1;
PRINTFD(FB_smiles_creation)
" SmiToStrVLA: extending scope\n"
ENDFD;
} else { /* nothing attached, so just close scope */
if(scp1->paren_flag)
concat_c(')');
cur_scope = ListElemPop(I->Scope,cur_scope);
PRINTFD(FB_smiles_creation)
" SmiToStrVLA: closing scope\n"
ENDFD;
}
}
}
start_atom = I->Atom[start_atom].link;
}
/* trim memory usage */
vla_set_size(result,char,strlen(result)+1);
return result;
}
static void ChampReassignLexPri(CChamp *I,int index)
{
/* reassigns lexical priorities based on the current tree
NOTE: unless stereo information is stored elsewhere, this trashes it
*/
int n_atom;
int cur_scope = 0;
int cur_atom = 0;
int cur_bond = 0;
int a;
int mark[MAX_RING];
int mark_bond[MAX_RING];
int i;
int left_to_do = 0;
int next_mark =1;
int start_atom = 0;
ListAtom *at1;
ListBond *bd1;
ListScope *scp1,*scp2;
int lex_pri = 0;
for(a=0;a<MAX_RING;a++) {
mark[a]=0;
}
cur_atom=I->Pat[index].atom;
n_atom=0;
while(cur_atom) {
n_atom++;
at1 = I->Atom + cur_atom;
at1->mark_tmpl = 0;
cur_atom = at1->link;
}
start_atom = I->Pat[index].atom;
while(start_atom) {
if(!I->Atom[start_atom].mark_tmpl) {
lex_pri++;
cur_scope = ListElemNewZero(&I->Scope);
I->Scope[cur_scope].atom = start_atom;
I->Scope[cur_scope].bond = -1; /* signals start in new scope */
while(cur_scope) {
scp1 = I->Scope + cur_scope;
cur_atom = scp1->atom;
at1=I->Atom + cur_atom;
if(scp1->bond<0) { /* starting new scope, so print atom and continue */
/* print bond, if required */
if(scp1->base_bond) {
bd1 = I->Bond+scp1->base_bond;
lex_pri++;
bd1->pri[0] = lex_pri;
bd1->pri[1] = lex_pri;
}
/* write atom string */
at1->mark_tmpl = 1;
lex_pri ++;
/* write opening marks */
for(a=0;a<MAX_BOND;a++) {
cur_bond = at1->bond[a];
if(!cur_bond) break;
bd1 = I->Bond+cur_bond;
/* write cycle indicator if necessary */
if(bd1->atom[0]!=cur_atom) {/* opposite direction -> explicit cycle */
if(!I->Atom[bd1->atom[0]].mark_tmpl)
{
if (next_mark >= MAX_RING) {
// observed with "2xwu > A > generate > vacuum electrostatics"
printf("DEBUG: %s next_mark %d\n", __FUNCTION__, next_mark);
break;
}
if(mark[next_mark]) {
for(index=0;index<9;index++) {
if(!mark[index]) break;
}
} else {
index = next_mark++;
}
if(index<MAX_RING) {
mark[index]=bd1->atom[0]; /* save for matching other end of cycle */
mark_bond[index]=cur_bond;
lex_pri++;
bd1->pri[1] = lex_pri; /* from this atom's point of view, this is the priority location*/
}
}
}
}
/* write closing marks */
for(index=0;index<MAX_RING;index++) {
if(mark[index]==cur_atom) {
lex_pri++;
bd1 = I->Bond + mark_bond[index];
bd1->pri[0] = lex_pri;
mark[index]=0;
}
}
}
/* increment bond index index counter */
scp1->bond++;
/* now determine whether or not we need to create a new scope,
and figure out which bond to work on */
i = scp1->bond;
left_to_do = 0;
cur_bond = 0;
while(i<MAX_BOND) {
if(!at1->bond[i]) break;
bd1 = I->Bond + at1->bond[i];
if(bd1->atom[0]==cur_atom) {
if(!I->Atom[bd1->atom[1]].mark_tmpl) { /* not yet complete */
if(!cur_bond) cur_bond = at1->bond[i];
left_to_do++;
}
}
i++;
}
if(left_to_do>1) { /* yes, we need a new scope */
cur_scope = ListElemPush(&I->Scope,cur_scope);
scp2 = I->Scope + cur_scope;
scp2->base_bond = cur_bond;
scp2->atom = I->Bond[cur_bond].atom[1];
scp2->bond = -1;
lex_pri++;
scp2->paren_flag=true;
} else if(left_to_do) { /* no we do not, so just extend current scope */
scp1->atom = I->Bond[cur_bond].atom[1];
scp1->base_bond = cur_bond;
scp1->bond = -1;
} else { /* nothing attached, so just close scope */
if(scp1->paren_flag)
lex_pri++;
cur_scope = ListElemPop(I->Scope,cur_scope);
}
}
}
start_atom = I->Atom[start_atom].link;
}
}
void ChampGeneralize(CChamp *I,int index)
{
ListBond *bd1;
int cur_bond = 0;
ChampPrepareTarget(I,index);
cur_bond=I->Pat[index].bond;
while(cur_bond) {
bd1 = I->Bond + cur_bond;
if(bd1->class&cH_Aromatic) {
bd1->order=cH_NoOrder; /* strip specific bond order
* from aromatic bonds */
bd1->class=cH_Pi; /* make all bonds Pi bonds */
}
cur_bond = bd1->link;
}
}
static void ChampStereoToInternal(CChamp *I, int index)
{
{
int n_atom;
int cur_atom = 0;
int cur_bond = 0;
int start_atom = 0;
int a;
int order_handedness;
int pri_handedness;
ListAtom *at1;
ListBond *bd1;
int n_bond;
int ati[MAX_BOND],pri[MAX_BOND];
/* first we need to sweep the molecule in order and assign lexical
priorities to bonds based on when atoms are written out */
cur_atom=I->Pat[index].atom;
n_atom=0;
while(cur_atom) {
n_atom++;
at1 = I->Atom + cur_atom;
at1->mark_tmpl = 0;
at1->stereo_internal = 0; /* clear stereo orientation */
cur_atom = at1->link;
}
start_atom = I->Pat[index].atom;
while(start_atom) {
if(!I->Atom[start_atom].mark_tmpl) {
cur_atom = start_atom;
at1=I->Atom + cur_atom;
at1->mark_tmpl = 1;
if(at1->stereo) {
n_bond = 0;
for(a=0;a<MAX_BOND;a++) {
cur_bond = at1->bond[a];
if(!cur_bond) break;
n_bond++;
}
if(n_bond==4) {
n_bond = 0;
for(a=0;a<MAX_BOND;a++) {
cur_bond = at1->bond[a];
if(!cur_bond) break;
bd1 = I->Bond+cur_bond;
if(bd1->atom[0]==cur_atom) {
pri[n_bond] = bd1->pri[0];
ati[n_bond] = bd1->atom[1];
} else {
pri[n_bond] = bd1->pri[1];
ati[n_bond] = bd1->atom[0];
}
n_bond++;
}
{ /* get coordinates into absolute atom order */
int idx[MAX_BOND];
SortIntIndex(4,pri,idx);
pri_handedness = ChiralHandedness(idx);
SortIntIndex(4,ati,idx);
order_handedness = ChiralHandedness(idx);
}
if(pri_handedness == order_handedness)
at1->stereo_internal = at1->stereo;
else
at1->stereo_internal = -at1->stereo;
}
}
start_atom = I->Atom[start_atom].link;
}
}
}
}
static void ChampStereoFromInternal(CChamp *I, int index)
{
{
int n_atom;
int cur_atom = 0;
int cur_bond = 0;
int start_atom = 0;
int pri_handedness,order_handedness;
int a;
ListAtom *at1;
ListBond *bd1;
int n_bond;
int ati[MAX_BOND],pri[MAX_BOND];
/* first we need to sweep the molecule in order and assign lexical
priorities to bonds based on when atoms are written out */
cur_atom=I->Pat[index].atom;
n_atom=0;
while(cur_atom) {
n_atom++;
at1 = I->Atom + cur_atom;
at1->mark_tmpl = 0;
at1->stereo = 0; /* clear stereo orientation */
cur_atom = at1->link;
}
start_atom = I->Pat[index].atom;
while(start_atom) {
if(!I->Atom[start_atom].mark_tmpl) {
cur_atom = start_atom;
at1=I->Atom + cur_atom;
at1->mark_tmpl = 1;
if(at1->stereo_internal) {
n_bond = 0;
for(a=0;a<MAX_BOND;a++) {
cur_bond = at1->bond[a];
if(!cur_bond) break;
n_bond++;
}
if(n_bond==4) {
n_bond = 0;
for(a=0;a<MAX_BOND;a++) {
cur_bond = at1->bond[a];
if(!cur_bond) break;
bd1 = I->Bond+cur_bond;
if(bd1->atom[0]==cur_atom) {
pri[n_bond] = bd1->pri[0];
ati[n_bond] = bd1->atom[1];
} else {
pri[n_bond] = bd1->pri[1];
ati[n_bond] = bd1->atom[0];
}
n_bond++;
}
{ /* get coordinates into absolute atom order */
int idx[MAX_BOND];
SortIntIndex(4,pri,idx);
pri_handedness = ChiralHandedness(idx);
SortIntIndex(4,ati,idx);
order_handedness = ChiralHandedness(idx);
}
if(pri_handedness==order_handedness)
at1->stereo = at1->stereo_internal;
else
at1->stereo = -at1->stereo_internal;
}
}
start_atom = I->Atom[start_atom].link;
}
}
}
}
void ChampOrientBonds(CChamp *I,int index)
/* This destroys stereo information...right? */
{
/* do a prepatory walk through the molecule to figure out how to minimize
explicit connections */
int n_atom;
int cur_scope = 0;
int cur_atom = 0;
int cur_bond = 0;
int a,tmp;
int i;
int left_to_do = 0;
int start_atom = 0;
int last_atom = 0;
ListAtom *at1;
ListBond *bd1;
ListScope *scp1,*scp2;
ChampStereoToInternal(I,index);
/* convert stereo information into bond-order-independent internal representation */
cur_atom=I->Pat[index].atom;
n_atom=0;
while(cur_atom) {
n_atom++;
at1 = I->Atom + cur_atom;
at1->mark_tmpl = 0;
cur_atom = at1->link;
}
cur_bond=I->Pat[index].bond; /* clear markings... */
while(cur_bond) {
bd1 = I->Bond + cur_bond;
bd1->mark_tmpl = 0;
cur_bond = bd1->link;
}
/* make sure that the start atom is not stereo-specified */
last_atom = 0;
start_atom = I->Pat[index].atom;
while(start_atom) {
if(!I->Atom[start_atom].stereo)
break;
last_atom = start_atom;
start_atom = I->Atom[start_atom].link;
}
if(last_atom&&start_atom) { /* start atom is stereo, so we've got to change things */
int old_first_atom = I->Pat[index].atom;
I->Pat[index].atom = start_atom;
I->Atom[last_atom].link = I->Atom[start_atom].link; /* excise this atom from the list */
I->Atom[start_atom].link = old_first_atom;
}
start_atom = I->Pat[index].atom;
while(start_atom) {
if(!I->Atom[start_atom].mark_tmpl) {
cur_scope = ListElemNewZero(&I->Scope);
I->Scope[cur_scope].atom = start_atom;
I->Scope[cur_scope].bond = -1; /* signals start in new scope */
while(cur_scope) {
scp1 = I->Scope + cur_scope;
cur_atom = scp1->atom;
at1=I->Atom + cur_atom;
if(scp1->bond<0) { /* starting new scope, so print atom and continue */
/* mark atom */
at1->mark_tmpl = 1;
/* reorder atom's bonds (if necessary) */
for(a=0;a<MAX_BOND;a++) {
cur_bond = at1->bond[a];
if(!cur_bond) break;
bd1 = I->Bond+cur_bond;
if(!bd1->mark_tmpl) { /* is this the first time we've seen this bond? */
bd1->mark_tmpl=1;
if(bd1->atom[0]!=cur_atom) { /* reorient bond to mimize explicit cycles... */
tmp=bd1->atom[0];
bd1->atom[0]=bd1->atom[1];
bd1->atom[1]=tmp;
}
} else { /* hmmm...not the first time...so how did we get here? */
if(bd1->atom[0]!=scp1->base_atom) { /* not this route so... */
tmp=bd1->atom[0]; /* reorient bond to capture explicit cycle */
bd1->atom[0]=bd1->atom[1];
bd1->atom[1]=tmp;
}
}
}
}
/* increment bond index index counter */
scp1->bond++;
/* now determine whether or not we need to create a new scope,
and figure out which bond to work on */
i = scp1->bond;
left_to_do = 0;
cur_bond = 0;
while(i<MAX_BOND) {
if(!at1->bond[i]) break;
bd1 = I->Bond + at1->bond[i];
if(!bd1->mark_tmpl) { /* is this the first time we've seen this bond? */
bd1->mark_tmpl=1;
if(bd1->atom[0]!=cur_atom) { /* reorient bond to mimize explicit cycles... */
tmp=bd1->atom[0];
bd1->atom[0]=bd1->atom[1];
bd1->atom[1]=tmp;
}
}
if(bd1->atom[0]==cur_atom) {
if(!I->Atom[bd1->atom[1]].mark_tmpl) { /* not yet complete */
if(!cur_bond) cur_bond = at1->bond[i];
left_to_do++;
}
}
i++;
}
if(left_to_do>1) { /* yes, we need a new scope */
cur_scope = ListElemPush(&I->Scope,cur_scope);
scp2 = I->Scope + cur_scope;
scp2->base_bond = cur_bond;
scp2->atom = I->Bond[cur_bond].atom[1];
scp2->base_atom = cur_atom;
scp2->bond = -1;
} else if(left_to_do) { /* no we do not, so just extend current scope */
scp1->atom = I->Bond[cur_bond].atom[1];
scp1->base_bond = cur_bond;
scp1->base_atom = cur_atom;
scp1->bond = -1;
} else { /* nothing attached, so just close scope */
cur_scope = ListElemPop(I->Scope,cur_scope);
}
}
}
start_atom = I->Atom[start_atom].link;
}
ChampReassignLexPri(I,index);
ChampStereoFromInternal(I,index);
}
#define R_SMALL 0.0000001F
static double sqrt1f(float f) { /* no good as a macro because f is used twice */
if(f>0.0F)
return(sqrt(f));
else
return 0.0;
}
static double length3f ( float *v1 )
{
return(sqrt1f((v1[0]*v1[0]) +
(v1[1]*v1[1]) +
(v1[2]*v1[2])));
}
static void normalize3f( float *v1 )
{
double vlen = length3f(v1);
if(vlen > R_SMALL)
{
float inV = (float)(1.0F / vlen);
v1[0] *= inV;
v1[1] *= inV;
v1[2] *= inV;
}
else
{
v1[0]=v1[1]=v1[2]=0.0F;
}
}
static void remove_component3f ( float *v1, float *unit, float *result)
{
float dot;
dot = v1[0]*unit[0] + v1[1]*unit[1] + v1[2]*unit[2];
result[0]=v1[0]-unit[0]*dot;
result[1]=v1[1]-unit[1]*dot;
result[2]=v1[2]-unit[2]*dot;
}
static float dot_product3f ( float *v1, float *v2 )
{
return( v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2]);
}
static void subtract3f ( float *v1, float *v2, float *v3 )
{
v3[0]=v1[0]-v2[0];
v3[1]=v1[1]-v2[1];
v3[2]=v1[2]-v2[2];
}
static void cross_product3f ( float *v1, float *v2, float *cross )
{
cross[0] = (v1[1]*v2[2]) - (v1[2]*v2[1]);
cross[1] = (v1[2]*v2[0]) - (v1[0]*v2[2]);
cross[2] = (v1[0]*v2[1]) - (v1[1]*v2[0]);
}
void ChampDetectChirality(CChamp *I,int index)
{
ChampReassignLexPri(I,index);
/* ChampPatDump(I,index);*/
/* now, find the chiral atoms and geometrically determine their handedness
based on their coordinates */
{
int n_atom;
int cur_atom = 0;
int cur_bond = 0;
int start_atom = 0;
int a;
ListAtom *at1;
ListBond *bd1;
int pri[MAX_BOND];
int n_bond;
int ati[MAX_BOND];
float *vc, *v[MAX_BOND],vd[MAX_BOND][3],vr[MAX_BOND][3],vt[3];
/* first we need to sweep the molecule in order and assign lexical
priorities to bonds based on when atoms are written out */
cur_atom=I->Pat[index].atom;
n_atom=0;
while(cur_atom) {
n_atom++;
at1 = I->Atom + cur_atom;
at1->mark_tmpl = 0;
at1->stereo = 0; /* clear stereo orientation */
cur_atom = at1->link;
}
start_atom = I->Pat[index].atom;
while(start_atom) {
if(!I->Atom[start_atom].mark_tmpl) {
cur_atom = start_atom;
at1=I->Atom + cur_atom;
/* printf("name: %s\n",at1->name);*/
at1->mark_tmpl = 1;
n_bond = 0;
for(a=0;a<MAX_BOND;a++) {
cur_bond = at1->bond[a];
if(!cur_bond) break;
n_bond++;
}
if(n_bond==4) {
/* get the relative lexical priorities for each bond */
n_bond = 0;
for(a=0;a<MAX_BOND;a++) {
cur_bond = at1->bond[a];
if(!cur_bond) break;
bd1 = I->Bond+cur_bond;
if(bd1->atom[0]==cur_atom) {
pri[n_bond] = bd1->pri[0];
ati[n_bond] = bd1->atom[1];
} else {
pri[n_bond] = bd1->pri[1];
ati[n_bond] = bd1->atom[0];
}
n_bond++;
}
{ /* get coordinates into lexical rotational order */
int idx[MAX_BOND];
SortIntIndex(4,pri,idx);
/*
printf("pri: %d %d %d %d\n",pri[0],pri[1],pri[2],pri[3]);
printf("idx: %d %d %d %d\n",idx[0],idx[1],idx[2],idx[3]);
printf("ord: %d %d %d %d\n",pri[idx[0]],pri[idx[1]],pri[idx[2]],pri[idx[3]]);
printf("%s %s %s %s\n",
I->Atom[ati[idx[0]]].name,
I->Atom[ati[idx[1]]].name,
I->Atom[ati[idx[2]]].name,
I->Atom[ati[idx[3]]].name);
*/
v[0]=I->Atom[ati[idx[0]]].coord;
v[1]=I->Atom[ati[idx[1]]].coord;
v[2]=I->Atom[ati[idx[2]]].coord;
v[3]=I->Atom[ati[idx[3]]].coord;
}
vc = at1->coord;
subtract3f(v[0],vc,vd[0]);
subtract3f(v[1],vc,vd[1]);
subtract3f(v[2],vc,vd[2]);
subtract3f(v[3],vc,vd[3]);
normalize3f(vd[0]);
remove_component3f(vd[1],vd[0],vr[1]);
remove_component3f(vd[2],vd[0],vr[2]);
remove_component3f(vd[3],vd[0],vr[3]);
cross_product3f(vr[1],vr[2],vt);
normalize3f(vt);
/* printf("%s %8.3f\n",at1->name,dot_product3f(vd[0],vt));*/
if(dot_product3f(vd[0],vt)>0.0F) {
at1->stereo = cH_Anticlock;
} else {
at1->stereo = cH_Clockwise;
}
}
start_atom = I->Atom[start_atom].link;
}
}
}
}
void ChampAtomDump(CChamp *I,int index)
{
char buf[3];
ChampAtomToString(I,index,buf);
printf("%s",buf);
}
void ChampPatDump(CChamp *I,int index)
{
int cur_atom;
int cur_bond;
int a;
AtomBuffer buf;
ListAtom *at;
ListBond *bd;
cur_atom = I->Pat[index].atom;
while(cur_atom) {
at = I->Atom+cur_atom;
ChampAtomToString(I,cur_atom,buf);
printf(" atom %d %3s 0x%08x nam: %s res: %s sym: %s\n",cur_atom,buf,at->atom,at->name,at->residue,at->symbol);
printf(" cy: %x",at->cycle);
printf(" cl: %x v: %02x D: %02x ch: %02x cy: %d st: %d ih: %d hy: %d hf: %d bo: ",
at->class,at->valence,at->degree,at->charge,at->cycle,
at->stereo,at->imp_hydro,at->tot_hydro,at->hydro_flag);
for(a=0;a<MAX_BOND;a++) {
if(!at->bond[a]) break;
else printf("%d ",at->bond[a]);
}
printf("\n");
printf(" pf: %d nf: %d !at %d !ch %d !cy %d !cl %d !D %d !v %d\n",
at->pos_flag,at->neg_flag,at->not_atom,
at->not_charge,at->not_cycle,at->not_class,
at->not_degree,at->not_valence);
cur_atom = I->Atom[cur_atom].link;
}
cur_bond = I->Pat[index].bond;
while(cur_bond) {
bd = I->Bond+cur_bond;
printf(" bond %d 0x%01x atoms %d %d order 0x%01x cycle %x dir %d class %x pri: %d %d\n",
cur_bond,bd->order,bd->atom[0],bd->atom[1],bd->order,bd->cycle,
bd->direction,bd->class,bd->pri[0],bd->pri[1]);
cur_bond = I->Bond[cur_bond].link;
}
fflush(stdout);
}
int ChampBondToString(CChamp *I,int index,char *buf)
{
ListBond *bd;
if(index) {
bd=I->Bond + index;
switch(bd->order) {
case cH_Single: buf[0]=0; break;
case cH_Double: buf[0]='='; buf[1]=0; break;
case cH_Triple: buf[0]='#'; buf[1]=0; break;
}
} else
buf[0]=0;
return(strlen(buf));
}
int ChampAtomToString(CChamp *I,int index,char *buf)
{
/* first, determine whether this is a trivial or block atom */
int c;
int mask;
int a;
int trivial=true;
ListAtom *at;
buf[0]=0;
if(index) {
at=I->Atom + index;
/* the following are non-trivial */
trivial = trivial && (!at->neg_flag) && (!at->hydro_flag);
trivial = trivial && !(at->charge&(cH_Cation|cH_Dication|
cH_Anion|cH_Dianion|
cH_Trication|cH_Trianion|
cH_Tetcation|cH_Tetanion|
cH_Pentcation|cH_Pentanion
));
trivial = trivial && !(at->cycle|at->valence|at->degree);
trivial = trivial && !(at->class&cH_Aliphatic);
trivial = trivial && !((at->atom!=cH_Any) &&
at->atom&( cH_Na | cH_K | cH_Ca | cH_Mg |
cH_Zn | cH_Fe | cH_Cu | cH_Se |
cH_A | cH_E | cH_G | cH_J |
cH_L | cH_M | cH_Q | cH_R |
cH_T | cH_X | cH_Z ));
trivial = trivial && (at->stereo==0);
if(trivial&&(at->atom!=cH_Any)) {
/* check number of atoms represented */
c = 0;
mask = 1;
for(a=0;a<32;a++) {
if(mask&at->atom) {
c++;
if(c>1) {
trivial=false;
break;
}
}
mask=mask*2;
}
}
if(trivial) {
if(at->class&cH_Aromatic) {
switch(at->atom) {
case cH_C: buf[0]='c'; buf[1]=0; break;
case cH_N: buf[0]='n'; buf[1]=0; break;
case cH_O: buf[0]='o'; buf[1]=0; break;
case cH_S: buf[0]='s'; buf[1]=0; break;
default:
trivial=false; break;
}
} else {
switch(at->atom) {
case cH_Any: buf[0]='*'; buf[1]=0; break;
case cH_NotH: buf[0]='?'; buf[1]=0; break;
case cH_B: buf[0]='B'; buf[1]=0; break;
case cH_C: buf[0]='C'; buf[1]=0; break;
case cH_N: buf[0]='N'; buf[1]=0; break;
case cH_O: buf[0]='O'; buf[1]=0; break;
/* case cH_H: buf[0]='H'; buf[1]=0; break;*/
case cH_H: strcpy(buf,"[H]"); break;
case cH_S: buf[0]='S'; buf[1]=0; break;
case cH_P: buf[0]='P'; buf[1]=0; break;
case cH_F: buf[0]='F'; buf[1]=0; break;
case cH_Cl: buf[0]='C'; buf[1]='l'; buf[2]=0; break;
case cH_Br: buf[0]='B'; buf[1]='r'; buf[2]=0; break;
case cH_I: buf[0]='I'; buf[1]=0; break;
default:
trivial = false; break;
}
}
}
if(!trivial) {
strcat(buf,"[");
if(at->atom==cH_Sym) {
strcat(buf,at->symbol);
} else {
switch(at->atom) {
case cH_Any: strcat(buf,"*"); break;
case cH_NotH: strcat(buf,"?"); break;
case cH_B: strcat(buf,"B"); break;
case cH_C: strcat(buf,"C"); break;
case cH_N: strcat(buf,"N"); break;
case cH_O: strcat(buf,"O"); break;
case cH_H: strcat(buf,"H"); break;
case cH_S: strcat(buf,"S"); break;
case cH_P: strcat(buf,"P"); break;
case cH_F: strcat(buf,"F"); break;
case cH_Cl: strcat(buf,"Cl"); break;
case cH_Br: strcat(buf,"Br"); break;
case cH_I: strcat(buf,"I"); break;
case cH_Na: strcat(buf,"Na"); break;
case cH_K: strcat(buf,"K"); break;
case cH_Ca: strcat(buf,"Ca"); break;
case cH_Mg: strcat(buf,"Mg"); break;
case cH_Fe: strcat(buf,"Fe"); break;
case cH_Zn: strcat(buf,"Zn"); break;
case cH_Cu: strcat(buf,"Cu"); break;
case cH_Se: strcat(buf,"Se"); break;
case cH_A: strcat(buf,"A"); break;
case cH_E: strcat(buf,"E"); break;
case cH_G: strcat(buf,"G"); break;
case cH_J: strcat(buf,"J"); break;
case cH_L: strcat(buf,"L"); break;
case cH_M: strcat(buf,"M"); break;
case cH_Q: strcat(buf,"Q"); break;
case cH_R: strcat(buf,"R"); break;
case cH_T: strcat(buf,"T"); break;
case cH_X: strcat(buf,"X"); break;
case cH_Z: strcat(buf,"Z"); break;
default: sprintf(buf,"%x",at->atom); break;
}
}
switch(at->stereo) {
case cH_Anticlock:
strcat(buf,"@");
break;
case cH_Clockwise:
strcat(buf,"@@");
break;
}
if(at->imp_hydro) {
switch(at->imp_hydro) {
case 0:
break;
case 1:
strcat(buf,"H");
break;
case 2:
strcat(buf,"H2");
break;
case 3:
strcat(buf,"H3");
break;
case 4:
strcat(buf,"H4");
break;
}
}
if(at->charge) {
switch(at->charge) {
case cH_Cation:
strcat(buf,"+");
break;
case cH_Anion:
strcat(buf,"-");
break;
case cH_Dication:
strcat(buf,"++");
break;
case cH_Dianion:
strcat(buf,"--");
break;
case cH_Trication:
strcat(buf,"+3");
break;
case cH_Trianion:
strcat(buf,"-3");
break;
case cH_Tetcation:
strcat(buf,"+4");
break;
case cH_Tetanion:
strcat(buf,"-4");
break;
case cH_Pentcation:
strcat(buf,"+5");
break;
case cH_Pentanion:
strcat(buf,"-5");
break;
}
}
strcat(buf,"]");
}
} else
buf[0]=0;
return(strlen(buf));
}
int ChampAddBondToAtom(CChamp *I,int atom_index,int bond_index)
{
int i;
ListAtom *at1;
int ok=true;
at1 = I->Atom+atom_index;
i=0;
while(at1->bond[i]) i++; /* flawed */
if(i<MAX_BOND) {
at1->bond[i] = bond_index;
} else {
PRINTFB(FB_smiles_parsing,FB_errors)
" champ: MAX_BOND exceeded...\n"
ENDFB;
ok=false;
}
return(ok);
}
int ChampSmiToPat(CChamp *I,char *c)
{ /* returns root atom of list */
int mark[MAX_RING]; /* ring marks 0-9 */
int mark_pri[MAX_RING]; /* lexical priority of mark */
int stack = 0; /* parenthetical scopes */
int base_atom = 0;
int last_atom = 0;
int last_bond = 0;
int atom_list = 0;
int bond_list = 0;
int bond_flag = false;
int cur_atom = 0;
int cur_bond = 0;
int mark_code = 0;
int result = 0;
int sym;
int ok = true;
unsigned int bond_tags = 0;
unsigned int bond_not_tags = 0;
int a;
int not_bond = false;
int lex_pri = 0;
char *orig_c=c;
#define save_bond() { if(last_bond) {I->Bond[last_bond].link=cur_bond;}\
else {bond_list=cur_bond;}\
last_bond = cur_bond;\
cur_bond = ListElemNewZero(&I->Bond);}
#define save_atom() { if(last_atom) {I->Atom[last_atom].link=cur_atom;}\
else {atom_list=cur_atom;}\
last_atom = cur_atom;\
cur_atom = ListElemNewZero(&I->Atom);}
PRINTFD(FB_smiles_parsing)
" ChampSmiToPat: input '%s'\n",c
ENDFD;
for(a=0;a<MAX_RING;a++)
mark[a]=0;
cur_atom = ListElemNewZero(&I->Atom);
cur_bond = ListElemNewZero(&I->Bond);
lex_pri = 0;
while((*c)&&ok) {
lex_pri++;
PRINTFD(FB_smiles_parsing)
" parsing: '%c' at %p\n",*c,c
ENDFD;
sym = cSym_Null;
/* ============ ROOT LEVEL PARSTING ============ */
if(((*c)>='0')&&((*c)<='9')) {
sym = cSym_Mark;
mark_code = (*c)-'0';
c++;
} else {
switch(*c) {
/* standard, implicit atoms, with lowest normal valences
* B(3), C(4), N(3,5), O(2), P(3,5), S(2,4,6), F(1), Cl(1), Br(1), I(1) */
case 'C':
switch(*(c+1)) {
case 'l':
case 'L': /* be tolerate at the root level, but not withing blocks...*/
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_Cl,2,false);
sym = cSym_Atom;
break;
default:
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_C,1,true);
sym = cSym_Atom;
PRINTFD(FB_smiles_parsing)
" parsed: %p\n",c
ENDFD;
break;
}
break;
case '<': /* tag index/list */
if(bond_flag) {
c = ChampParseTag(I,c,&bond_tags,&bond_not_tags,&ok);
} else {
if(base_atom) {
c = ChampParseTag(I,c,&I->Atom[base_atom].tag,
&I->Atom[base_atom].not_tag,&ok);
} else ok=false;
}
sym = cSym_Qualifier;
break;
case '*': /* nonstandard? */
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_Any,1,false);
sym = cSym_Atom;
break;
case '?': /* nonstandard */
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_NotH,1,false);
sym = cSym_Atom;
break;
case 'H': /* nonstandard */
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_H,1,false);
sym = cSym_Atom;
break;
case 'N':
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_N,1,true);
sym = cSym_Atom;
break;
case 'O':
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_O,1,true);
sym = cSym_Atom;
break;
case 'B':
switch(*(c+1)) {
case 'r':
case 'R':
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_Br,2,true);
sym = cSym_Atom;
break;
default:
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_B,1,true);
sym = cSym_Atom;
break;
}
break;
case 'P':
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_P,1,true);
sym = cSym_Atom;
break;
case 'S':
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_S,1,true);
sym = cSym_Atom;
break;
case 'F':
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_F,1,true);
sym = cSym_Atom;
break;
case 'I':
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_I,1,true);
sym = cSym_Atom;
break;
/* standard implicit aromatic atoms */
case 'c':
c = ChampParseAromaticAtom(I,c,cur_atom,cH_C,1,true);
sym = cSym_Atom;
break;
case 'n':
c = ChampParseAromaticAtom(I,c,cur_atom,cH_N,1,true);
sym = cSym_Atom;
break;
case 'o':
c = ChampParseAromaticAtom(I,c,cur_atom,cH_O,1,true);
sym = cSym_Atom;
break;
case 's':
c = ChampParseAromaticAtom(I,c,cur_atom,cH_S,1,true);
sym = cSym_Atom;
break;
case ';':
c++;
not_bond=false;
sym = cSym_Qualifier;
break;
case ',':
c++;
sym = cSym_Qualifier;
break;
case '!':
c++;
not_bond=true;
sym = cSym_Qualifier;
break;
case '-':
c++;
if(not_bond)
I->Bond[cur_bond].not_order |= cH_Single;
else
I->Bond[cur_bond].order |= cH_Single;
sym = cSym_Bond;
break;
case '/':
c++;
if(not_bond)
I->Bond[cur_bond].not_order |= cH_Single;
else
I->Bond[cur_bond].order |= cH_Single;
sym = cSym_Bond;
I->Bond[cur_bond].direction = cH_Up;
break;
case '\\':
c++;
if(not_bond)
I->Bond[cur_bond].not_order |= cH_Single;
else
I->Bond[cur_bond].order |= cH_Single;
sym = cSym_Bond;
I->Bond[cur_bond].direction = cH_Down;
break;
case '=':
c++;
if(not_bond)
I->Bond[cur_bond].not_order |= cH_Double;
else
I->Bond[cur_bond].order |= cH_Double;
sym = cSym_Bond;
break;
case '#':
c++;
if(not_bond)
I->Bond[cur_bond].not_order |= cH_Triple;
else
I->Bond[cur_bond].order |= cH_Triple;
sym = cSym_Bond;
break;
case '~':
c++;
if(not_bond) {
I->Bond[cur_bond].not_order |= cH_AnyOrder;
I->Bond[cur_bond].not_class |= cH_AnyClass;
} else {
I->Bond[cur_bond].order |= cH_AnyOrder;
I->Bond[cur_bond].class |= cH_AnyClass;
}
sym = cSym_Bond;
break;
case '@':
c++;
if(not_bond)
I->Bond[cur_bond].not_cycle |= cH_Cyclic;
else
I->Bond[cur_bond].cycle |= cH_Cyclic;
sym = cSym_Bond;
break;
case ':':
c++;
if(not_bond)
I->Bond[cur_bond].not_class |= cH_Aromatic;
else
I->Bond[cur_bond].class |= cH_Aromatic;
sym = cSym_Bond;
break;
case '.': /* separator */
c++;
sym = cSym_Separator;
break;
case '%':
c++;
if(c) {
mark_code = 10*((*c)-'0');
c++;
} /* else error */
if(c) {
sym = cSym_Mark;
mark_code += (*c)-'0';
c++;
} /* else error */
break;
case '(':
c++;
sym = cSym_OpenScope;
break;
case ')':
c++;
sym = cSym_CloseScope;
break;
case '[':
c++;
sym = cSym_OpenBlock;
break;
case ']':
c++;
sym = cSym_CloseBlock;
break;
}
}
if(sym==cSym_Null) {
PRINTFB(FB_smiles_parsing,FB_errors)
" champ: error parsing smiles string at '%c' (char %zd) in\n champ: '%s'\n",*c,c-orig_c,orig_c
ENDFB;
ok=false;
}
if(ok) {
/* =========== actions based on root level parsing ========== */
switch(sym) {
case cSym_OpenBlock:
ok = ChampParseAtomBlock(I,&c,cur_atom);
case cSym_Atom:
/* was there a preceeding atom? if so, then form bond and save atom */
if(base_atom) {
PRINTFD(FB_smiles_parsing)
" ChampSmiToPtr: saving atom %d\n",last_atom
ENDFD;
/* backward link */
I->Bond[cur_bond].atom[0] = base_atom;
I->Bond[cur_bond].atom[1] = cur_atom;
I->Bond[cur_bond].pri[0] = lex_pri;
I->Bond[cur_bond].pri[1] = lex_pri;
if(!bond_flag) {
if((I->Atom[cur_atom].class&cH_Aromatic)&&
(I->Atom[base_atom].class&cH_Aromatic))
I->Bond[cur_bond].order = (cH_Single|cH_Aromatic); /* is this right? */
else
I->Bond[cur_bond].order = cH_Single;
}
I->Bond[cur_bond].tag = bond_tags; /* save bond tags */
I->Bond[cur_bond].not_tag = bond_not_tags; /* save bond tags */
bond_tags=0;
bond_not_tags=0;
ok = ChampAddBondToAtom(I,cur_atom,cur_bond);
if(ok) {
ok = ChampAddBondToAtom(I,base_atom,cur_bond);
save_bond();
}
bond_flag=false;
not_bond=false;
}
base_atom = cur_atom;
save_atom();
break;
case cSym_CloseBlock: /* should never be reached */
break;
case cSym_OpenScope: /* push base_atom onto stack */
stack = ListElemPushInt(&I->Int,stack,base_atom);
break;
case cSym_CloseScope:
if(!stack) {
PRINTFB(FB_smiles_parsing,FB_errors)
" champ: stack underflow for scope...\n"
ENDFB;
ok=false;
} else {
base_atom=I->Int[stack].value;
stack = ListElemPop(I->Int,stack);
}
break;
case cSym_Bond:
bond_flag=true;
break;
case cSym_Mark:
if(base_atom) {
if(!mark[mark_code]) { /* opening cycle */
mark[mark_code] = base_atom;
mark_pri[mark_code] = lex_pri;
bond_flag = false; /* ignore the first bond valence...we'll get it from the second half of the mark*/
not_bond = false;
} else { /* closing cycle */
I->Bond[cur_bond].atom[0] = base_atom;
I->Bond[cur_bond].atom[1] = mark[mark_code];
I->Bond[cur_bond].pri[0] = lex_pri;
I->Bond[cur_bond].pri[1] = mark_pri[mark_code];
if(!bond_flag) {
I->Bond[cur_bond].order = cH_Single;
}
ok = ChampAddBondToAtom(I,base_atom,cur_bond);
if(ok) {
ok = ChampAddBondToAtom(I,mark[mark_code],cur_bond);
save_bond();
}
mark[mark_code]=0;
bond_flag=false;
not_bond=false;
}
} else {
PRINTFB(FB_smiles_parsing,FB_errors)
" champ: syntax error...\n"
ENDFB;
ok = false;
}
break;
case cSym_Separator:
base_atom = 0;
break;
case cSym_Qualifier:
break;
}
}
}
if(ok&&atom_list) {
result = ListElemNewZero(&I->Pat);
if(result) {
I->ActivePatList = ListElemPushInt(&I->Int,I->ActivePatList,result);
I->Pat[result].atom = atom_list;
I->Pat[result].bond = bond_list;
} else
ok=false;
}
if(cur_atom) ChampAtomFree(I,cur_atom);
if(cur_bond) ChampBondFree(I,cur_bond);
if(result) ChampPatReindex(I,result);
PRINTFD(FB_smiles_parsing)
" ChampSmiToPtr: returning pattern %d atom_list %d bond_list %d\n",result,atom_list,bond_list
ENDFD;
return(result);
}
/* ===============================================================
* Matching
* =============================================================== */
int ChampPatIdentical(ListAtom *p,ListAtom *a)
{
if(p->pos_flag!=a->pos_flag)
return 0;
else {
if(p->pos_flag)
if((p->atom!=a->atom)||
(p->charge!=a->charge)||
(p->cycle!=a->cycle)||
(p->class!=a->class)||
(p->degree!=a->degree)||
(p->valence!=a->valence))
return 0;
}
if(p->neg_flag!=a->neg_flag)
return 0;
else {
if(p->neg_flag)
if((p->not_atom!=a->atom)||
(p->not_charge!=a->charge)||
(p->not_cycle!=a->cycle)||
(p->not_class!=a->class)||
(p->not_degree!=a->degree)||
(p->not_valence!=a->valence))
return 0;
}
return 1;
}
int ChampAtomMatch(ListAtom *p,ListAtom *a)
{
if((((!p->pos_flag)||
(((!p->atom)||(p->atom&a->atom))&&
((!p->charge)||(p->charge&a->charge))&&
((!p->cycle)||(p->cycle&a->cycle))&&
((!p->class)||(p->class&a->class))&&
((!p->degree)||(p->degree&a->degree))&&
((!p->valence)||(p->valence&a->valence))))&&
((!p->neg_flag)||
(((!p->not_atom)||(!(p->not_atom&a->atom)))&&
((!p->not_charge)||(!(p->not_charge&a->charge)))&&
((!p->not_cycle)||(!(p->not_cycle&a->cycle)))&&
((!p->not_class)||(!(p->not_class&a->class)))&&
((!p->not_degree)||(!(p->not_degree&a->degree)))&&
((!p->not_valence)||(!(p->not_valence&a->valence)))))))
{
if(p->name[0])
if(strcmp(p->name,a->name))
return 0;
if(p->residue[0])
if(strcmp(p->residue,a->residue))
return 0;
if(p->symbol[0])
if(strcmp(p->symbol,a->symbol))
return 0;
if(p->hydro_flag) {
if(p->tot_hydro>a->tot_hydro) { /* must have at least as many hydrogens as pattern... */
return 0;
}
}
return 1;
}
/* what about implicit hydrogens? */
return 0;
}
int ChampBondMatch(ListBond *p,ListBond *a)
{
return((((!p->order)||(p->order&a->order))&&
((!p->class)||(p->class&a->class))&&
((!p->cycle)||(p->cycle&a->cycle))&&
((!p->not_order)||(!(p->not_order&a->order)))&&
((!p->not_class)||(!(p->not_class&a->class)))&&
((!p->not_cycle)||(!(p->not_cycle&a->cycle)))));
}