in contrib/champ/champ.c [4742:5131]
int ChampSmiToPat(CChamp *I,char *c)
{ /* returns root atom of list */
int mark[MAX_RING]; /* ring marks 0-9 */
int mark_pri[MAX_RING]; /* lexical priority of mark */
int stack = 0; /* parenthetical scopes */
int base_atom = 0;
int last_atom = 0;
int last_bond = 0;
int atom_list = 0;
int bond_list = 0;
int bond_flag = false;
int cur_atom = 0;
int cur_bond = 0;
int mark_code = 0;
int result = 0;
int sym;
int ok = true;
unsigned int bond_tags = 0;
unsigned int bond_not_tags = 0;
int a;
int not_bond = false;
int lex_pri = 0;
char *orig_c=c;
#define save_bond() { if(last_bond) {I->Bond[last_bond].link=cur_bond;}\
else {bond_list=cur_bond;}\
last_bond = cur_bond;\
cur_bond = ListElemNewZero(&I->Bond);}
#define save_atom() { if(last_atom) {I->Atom[last_atom].link=cur_atom;}\
else {atom_list=cur_atom;}\
last_atom = cur_atom;\
cur_atom = ListElemNewZero(&I->Atom);}
PRINTFD(FB_smiles_parsing)
" ChampSmiToPat: input '%s'\n",c
ENDFD;
for(a=0;a<MAX_RING;a++)
mark[a]=0;
cur_atom = ListElemNewZero(&I->Atom);
cur_bond = ListElemNewZero(&I->Bond);
lex_pri = 0;
while((*c)&&ok) {
lex_pri++;
PRINTFD(FB_smiles_parsing)
" parsing: '%c' at %p\n",*c,c
ENDFD;
sym = cSym_Null;
/* ============ ROOT LEVEL PARSTING ============ */
if(((*c)>='0')&&((*c)<='9')) {
sym = cSym_Mark;
mark_code = (*c)-'0';
c++;
} else {
switch(*c) {
/* standard, implicit atoms, with lowest normal valences
* B(3), C(4), N(3,5), O(2), P(3,5), S(2,4,6), F(1), Cl(1), Br(1), I(1) */
case 'C':
switch(*(c+1)) {
case 'l':
case 'L': /* be tolerate at the root level, but not withing blocks...*/
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_Cl,2,false);
sym = cSym_Atom;
break;
default:
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_C,1,true);
sym = cSym_Atom;
PRINTFD(FB_smiles_parsing)
" parsed: %p\n",c
ENDFD;
break;
}
break;
case '<': /* tag index/list */
if(bond_flag) {
c = ChampParseTag(I,c,&bond_tags,&bond_not_tags,&ok);
} else {
if(base_atom) {
c = ChampParseTag(I,c,&I->Atom[base_atom].tag,
&I->Atom[base_atom].not_tag,&ok);
} else ok=false;
}
sym = cSym_Qualifier;
break;
case '*': /* nonstandard? */
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_Any,1,false);
sym = cSym_Atom;
break;
case '?': /* nonstandard */
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_NotH,1,false);
sym = cSym_Atom;
break;
case 'H': /* nonstandard */
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_H,1,false);
sym = cSym_Atom;
break;
case 'N':
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_N,1,true);
sym = cSym_Atom;
break;
case 'O':
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_O,1,true);
sym = cSym_Atom;
break;
case 'B':
switch(*(c+1)) {
case 'r':
case 'R':
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_Br,2,true);
sym = cSym_Atom;
break;
default:
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_B,1,true);
sym = cSym_Atom;
break;
}
break;
case 'P':
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_P,1,true);
sym = cSym_Atom;
break;
case 'S':
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_S,1,true);
sym = cSym_Atom;
break;
case 'F':
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_F,1,true);
sym = cSym_Atom;
break;
case 'I':
c = ChampParseAliphaticAtom(I,c,cur_atom,cH_I,1,true);
sym = cSym_Atom;
break;
/* standard implicit aromatic atoms */
case 'c':
c = ChampParseAromaticAtom(I,c,cur_atom,cH_C,1,true);
sym = cSym_Atom;
break;
case 'n':
c = ChampParseAromaticAtom(I,c,cur_atom,cH_N,1,true);
sym = cSym_Atom;
break;
case 'o':
c = ChampParseAromaticAtom(I,c,cur_atom,cH_O,1,true);
sym = cSym_Atom;
break;
case 's':
c = ChampParseAromaticAtom(I,c,cur_atom,cH_S,1,true);
sym = cSym_Atom;
break;
case ';':
c++;
not_bond=false;
sym = cSym_Qualifier;
break;
case ',':
c++;
sym = cSym_Qualifier;
break;
case '!':
c++;
not_bond=true;
sym = cSym_Qualifier;
break;
case '-':
c++;
if(not_bond)
I->Bond[cur_bond].not_order |= cH_Single;
else
I->Bond[cur_bond].order |= cH_Single;
sym = cSym_Bond;
break;
case '/':
c++;
if(not_bond)
I->Bond[cur_bond].not_order |= cH_Single;
else
I->Bond[cur_bond].order |= cH_Single;
sym = cSym_Bond;
I->Bond[cur_bond].direction = cH_Up;
break;
case '\\':
c++;
if(not_bond)
I->Bond[cur_bond].not_order |= cH_Single;
else
I->Bond[cur_bond].order |= cH_Single;
sym = cSym_Bond;
I->Bond[cur_bond].direction = cH_Down;
break;
case '=':
c++;
if(not_bond)
I->Bond[cur_bond].not_order |= cH_Double;
else
I->Bond[cur_bond].order |= cH_Double;
sym = cSym_Bond;
break;
case '#':
c++;
if(not_bond)
I->Bond[cur_bond].not_order |= cH_Triple;
else
I->Bond[cur_bond].order |= cH_Triple;
sym = cSym_Bond;
break;
case '~':
c++;
if(not_bond) {
I->Bond[cur_bond].not_order |= cH_AnyOrder;
I->Bond[cur_bond].not_class |= cH_AnyClass;
} else {
I->Bond[cur_bond].order |= cH_AnyOrder;
I->Bond[cur_bond].class |= cH_AnyClass;
}
sym = cSym_Bond;
break;
case '@':
c++;
if(not_bond)
I->Bond[cur_bond].not_cycle |= cH_Cyclic;
else
I->Bond[cur_bond].cycle |= cH_Cyclic;
sym = cSym_Bond;
break;
case ':':
c++;
if(not_bond)
I->Bond[cur_bond].not_class |= cH_Aromatic;
else
I->Bond[cur_bond].class |= cH_Aromatic;
sym = cSym_Bond;
break;
case '.': /* separator */
c++;
sym = cSym_Separator;
break;
case '%':
c++;
if(c) {
mark_code = 10*((*c)-'0');
c++;
} /* else error */
if(c) {
sym = cSym_Mark;
mark_code += (*c)-'0';
c++;
} /* else error */
break;
case '(':
c++;
sym = cSym_OpenScope;
break;
case ')':
c++;
sym = cSym_CloseScope;
break;
case '[':
c++;
sym = cSym_OpenBlock;
break;
case ']':
c++;
sym = cSym_CloseBlock;
break;
}
}
if(sym==cSym_Null) {
PRINTFB(FB_smiles_parsing,FB_errors)
" champ: error parsing smiles string at '%c' (char %zd) in\n champ: '%s'\n",*c,c-orig_c,orig_c
ENDFB;
ok=false;
}
if(ok) {
/* =========== actions based on root level parsing ========== */
switch(sym) {
case cSym_OpenBlock:
ok = ChampParseAtomBlock(I,&c,cur_atom);
case cSym_Atom:
/* was there a preceeding atom? if so, then form bond and save atom */
if(base_atom) {
PRINTFD(FB_smiles_parsing)
" ChampSmiToPtr: saving atom %d\n",last_atom
ENDFD;
/* backward link */
I->Bond[cur_bond].atom[0] = base_atom;
I->Bond[cur_bond].atom[1] = cur_atom;
I->Bond[cur_bond].pri[0] = lex_pri;
I->Bond[cur_bond].pri[1] = lex_pri;
if(!bond_flag) {
if((I->Atom[cur_atom].class&cH_Aromatic)&&
(I->Atom[base_atom].class&cH_Aromatic))
I->Bond[cur_bond].order = (cH_Single|cH_Aromatic); /* is this right? */
else
I->Bond[cur_bond].order = cH_Single;
}
I->Bond[cur_bond].tag = bond_tags; /* save bond tags */
I->Bond[cur_bond].not_tag = bond_not_tags; /* save bond tags */
bond_tags=0;
bond_not_tags=0;
ok = ChampAddBondToAtom(I,cur_atom,cur_bond);
if(ok) {
ok = ChampAddBondToAtom(I,base_atom,cur_bond);
save_bond();
}
bond_flag=false;
not_bond=false;
}
base_atom = cur_atom;
save_atom();
break;
case cSym_CloseBlock: /* should never be reached */
break;
case cSym_OpenScope: /* push base_atom onto stack */
stack = ListElemPushInt(&I->Int,stack,base_atom);
break;
case cSym_CloseScope:
if(!stack) {
PRINTFB(FB_smiles_parsing,FB_errors)
" champ: stack underflow for scope...\n"
ENDFB;
ok=false;
} else {
base_atom=I->Int[stack].value;
stack = ListElemPop(I->Int,stack);
}
break;
case cSym_Bond:
bond_flag=true;
break;
case cSym_Mark:
if(base_atom) {
if(!mark[mark_code]) { /* opening cycle */
mark[mark_code] = base_atom;
mark_pri[mark_code] = lex_pri;
bond_flag = false; /* ignore the first bond valence...we'll get it from the second half of the mark*/
not_bond = false;
} else { /* closing cycle */
I->Bond[cur_bond].atom[0] = base_atom;
I->Bond[cur_bond].atom[1] = mark[mark_code];
I->Bond[cur_bond].pri[0] = lex_pri;
I->Bond[cur_bond].pri[1] = mark_pri[mark_code];
if(!bond_flag) {
I->Bond[cur_bond].order = cH_Single;
}
ok = ChampAddBondToAtom(I,base_atom,cur_bond);
if(ok) {
ok = ChampAddBondToAtom(I,mark[mark_code],cur_bond);
save_bond();
}
mark[mark_code]=0;
bond_flag=false;
not_bond=false;
}
} else {
PRINTFB(FB_smiles_parsing,FB_errors)
" champ: syntax error...\n"
ENDFB;
ok = false;
}
break;
case cSym_Separator:
base_atom = 0;
break;
case cSym_Qualifier:
break;
}
}
}
if(ok&&atom_list) {
result = ListElemNewZero(&I->Pat);
if(result) {
I->ActivePatList = ListElemPushInt(&I->Int,I->ActivePatList,result);
I->Pat[result].atom = atom_list;
I->Pat[result].bond = bond_list;
} else
ok=false;
}
if(cur_atom) ChampAtomFree(I,cur_atom);
if(cur_bond) ChampBondFree(I,cur_bond);
if(result) ChampPatReindex(I,result);
PRINTFD(FB_smiles_parsing)
" ChampSmiToPtr: returning pattern %d atom_list %d bond_list %d\n",result,atom_list,bond_list
ENDFD;
return(result);
}