int ChampSmiToPat()

in contrib/champ/champ.c [4742:5131]


int ChampSmiToPat(CChamp *I,char *c) 
{ /* returns root atom of list */
  int mark[MAX_RING]; /* ring marks 0-9 */
  int mark_pri[MAX_RING]; /* lexical priority of mark */
  int stack = 0; /* parenthetical scopes */
  int base_atom = 0;
  int last_atom = 0;
  int last_bond = 0;
  int atom_list = 0;
  int bond_list = 0;
  int bond_flag = false;
  int cur_atom = 0;
  int cur_bond = 0;
  int mark_code = 0;
  int result = 0;
  int sym;
  int ok = true;
  unsigned int bond_tags = 0;
  unsigned int bond_not_tags = 0;
  int a;
  int not_bond = false;
  int lex_pri = 0;
  char *orig_c=c;

#define save_bond() { if(last_bond) {I->Bond[last_bond].link=cur_bond;}\
          else {bond_list=cur_bond;}\
          last_bond = cur_bond;\
          cur_bond = ListElemNewZero(&I->Bond);}
  
#define save_atom() { if(last_atom) {I->Atom[last_atom].link=cur_atom;}\
          else {atom_list=cur_atom;}\
          last_atom = cur_atom;\
          cur_atom = ListElemNewZero(&I->Atom);}

  PRINTFD(FB_smiles_parsing) 
    " ChampSmiToPat: input '%s'\n",c
    ENDFD;
  
  for(a=0;a<MAX_RING;a++)
    mark[a]=0;
  cur_atom = ListElemNewZero(&I->Atom);
  cur_bond = ListElemNewZero(&I->Bond);
  
  lex_pri = 0;
  while((*c)&&ok) {
    lex_pri++;
    PRINTFD(FB_smiles_parsing) 
      " parsing: '%c' at %p\n",*c,c
      ENDFD;
    sym = cSym_Null;
    /* ============ ROOT LEVEL PARSTING ============ */
    if(((*c)>='0')&&((*c)<='9')) {
      sym = cSym_Mark;
      mark_code = (*c)-'0';
      c++;
    } else {
      switch(*c) {
      /* standard, implicit atoms, with lowest normal valences
       * B(3), C(4), N(3,5), O(2), P(3,5), S(2,4,6), F(1), Cl(1), Br(1), I(1) */
      case 'C':
        switch(*(c+1)) {
        case 'l':
        case 'L': /* be tolerate at the root level, but not withing blocks...*/
          c = ChampParseAliphaticAtom(I,c,cur_atom,cH_Cl,2,false);
          sym = cSym_Atom;
          break;
        default:
          c = ChampParseAliphaticAtom(I,c,cur_atom,cH_C,1,true);
          sym = cSym_Atom;
          PRINTFD(FB_smiles_parsing) 
            " parsed: %p\n",c
            ENDFD;
          break;
        }
        break;
      case '<': /* tag index/list */
        if(bond_flag) {
          c = ChampParseTag(I,c,&bond_tags,&bond_not_tags,&ok);
        } else {
          if(base_atom) {
            c = ChampParseTag(I,c,&I->Atom[base_atom].tag,
                              &I->Atom[base_atom].not_tag,&ok);
          } else ok=false;
        }
        sym = cSym_Qualifier;
        break;
      case '*': /* nonstandard? */
        c = ChampParseAliphaticAtom(I,c,cur_atom,cH_Any,1,false);
        sym = cSym_Atom;
        break;
      case '?': /* nonstandard */
        c = ChampParseAliphaticAtom(I,c,cur_atom,cH_NotH,1,false);
        sym = cSym_Atom;
        break;
      case 'H': /* nonstandard */
        c = ChampParseAliphaticAtom(I,c,cur_atom,cH_H,1,false);
        sym = cSym_Atom;
        break;
      case 'N':
        c = ChampParseAliphaticAtom(I,c,cur_atom,cH_N,1,true);
        sym = cSym_Atom;
        break;      
      case 'O':
        c = ChampParseAliphaticAtom(I,c,cur_atom,cH_O,1,true);
        sym = cSym_Atom;
        break;      
      case 'B':
        switch(*(c+1)) {
        case 'r':
        case 'R':
          c = ChampParseAliphaticAtom(I,c,cur_atom,cH_Br,2,true);
          sym = cSym_Atom;
          break;
        default:
          c = ChampParseAliphaticAtom(I,c,cur_atom,cH_B,1,true);
          sym = cSym_Atom;
          break;
        }
        break;
      case 'P':
        c = ChampParseAliphaticAtom(I,c,cur_atom,cH_P,1,true);
        sym = cSym_Atom;
        break;      
      case 'S':
        c = ChampParseAliphaticAtom(I,c,cur_atom,cH_S,1,true);
        sym = cSym_Atom;
        break;      
      case 'F':
        c = ChampParseAliphaticAtom(I,c,cur_atom,cH_F,1,true);
        sym = cSym_Atom;
        break;      
      case 'I':
        c = ChampParseAliphaticAtom(I,c,cur_atom,cH_I,1,true);
        sym = cSym_Atom;
        break;      
        /* standard implicit aromatic atoms */
      case 'c':
        c = ChampParseAromaticAtom(I,c,cur_atom,cH_C,1,true);
        sym = cSym_Atom;
        break;
      case 'n':
        c = ChampParseAromaticAtom(I,c,cur_atom,cH_N,1,true);
        sym = cSym_Atom;
        break;
      case 'o':
        c = ChampParseAromaticAtom(I,c,cur_atom,cH_O,1,true);
        sym = cSym_Atom;
        break;
      case 's':
        c = ChampParseAromaticAtom(I,c,cur_atom,cH_S,1,true);
        sym = cSym_Atom;
        break;
      case ';':
        c++;
        not_bond=false;
        sym = cSym_Qualifier;
        break;
      case ',':
        c++;
        sym = cSym_Qualifier;
        break;
      case '!':
        c++;
        not_bond=true;
        sym = cSym_Qualifier;
        break;
      case '-':
        c++;
        if(not_bond) 
          I->Bond[cur_bond].not_order |= cH_Single;
        else 
          I->Bond[cur_bond].order |= cH_Single;
        sym = cSym_Bond;
        break;
      case '/':
        c++;
        if(not_bond) 
          I->Bond[cur_bond].not_order |= cH_Single;
        else 
          I->Bond[cur_bond].order |= cH_Single;
        sym = cSym_Bond;
        I->Bond[cur_bond].direction = cH_Up;
        break;
      case '\\':
        c++;
        if(not_bond) 
          I->Bond[cur_bond].not_order |= cH_Single;
        else 
          I->Bond[cur_bond].order |= cH_Single;
        sym = cSym_Bond;
        I->Bond[cur_bond].direction = cH_Down;
        break;
      case '=':
        c++;
        if(not_bond)
          I->Bond[cur_bond].not_order |= cH_Double;
        else
          I->Bond[cur_bond].order |= cH_Double;
        sym = cSym_Bond;
        break;
      case '#':
        c++;
        if(not_bond)
          I->Bond[cur_bond].not_order |= cH_Triple;
        else
          I->Bond[cur_bond].order |= cH_Triple;
        sym = cSym_Bond;
        break;
      case '~':
        c++;
        if(not_bond) {
          I->Bond[cur_bond].not_order |= cH_AnyOrder;
          I->Bond[cur_bond].not_class |= cH_AnyClass;
        } else {
          I->Bond[cur_bond].order |= cH_AnyOrder;
          I->Bond[cur_bond].class |= cH_AnyClass;
        }
        sym = cSym_Bond;
        break;
      case '@':
        c++;
        if(not_bond)
          I->Bond[cur_bond].not_cycle |= cH_Cyclic;
        else
          I->Bond[cur_bond].cycle |= cH_Cyclic;
        sym = cSym_Bond;
        break;
      case ':':
        c++;
        if(not_bond)
          I->Bond[cur_bond].not_class |= cH_Aromatic;
        else
          I->Bond[cur_bond].class |= cH_Aromatic;
        sym = cSym_Bond;
        break;
      case '.': /* separator */
        c++;
        sym = cSym_Separator;
        break;
      case '%':
        c++;
        if(c) { 
          mark_code = 10*((*c)-'0');
          c++;
        } /* else error */
        if(c) {
          sym = cSym_Mark;
          mark_code += (*c)-'0';
          c++;
        } /* else error */
        break;
      case '(':
        c++;
        sym = cSym_OpenScope;
        break;
      case ')':
        c++;
        sym = cSym_CloseScope;
        break;
      case '[':
        c++;
        sym = cSym_OpenBlock;
        break;
      case ']':
        c++;
        sym = cSym_CloseBlock;
        break;
      }
    }
    if(sym==cSym_Null) {
      PRINTFB(FB_smiles_parsing,FB_errors)
        " champ: error parsing smiles string at '%c' (char %zd) in\n champ: '%s'\n",*c,c-orig_c,orig_c
        ENDFB;
      ok=false;
    }
    if(ok) {
      /* =========== actions based on root level parsing ========== */
      switch(sym) {
      case cSym_OpenBlock:
        ok = ChampParseAtomBlock(I,&c,cur_atom);
      case cSym_Atom:
        /* was there a preceeding atom? if so, then form bond and save atom */
        if(base_atom) {
          PRINTFD(FB_smiles_parsing) 
            " ChampSmiToPtr: saving atom %d\n",last_atom
            ENDFD;
          /* backward link */
          I->Bond[cur_bond].atom[0] = base_atom;
          I->Bond[cur_bond].atom[1] = cur_atom;
          I->Bond[cur_bond].pri[0] = lex_pri;
          I->Bond[cur_bond].pri[1] = lex_pri;
          if(!bond_flag) {
            if((I->Atom[cur_atom].class&cH_Aromatic)&&
               (I->Atom[base_atom].class&cH_Aromatic))
              I->Bond[cur_bond].order = (cH_Single|cH_Aromatic); /* is this right? */
            else
              I->Bond[cur_bond].order = cH_Single;
          } 
          I->Bond[cur_bond].tag = bond_tags; /* save bond tags */
          I->Bond[cur_bond].not_tag = bond_not_tags; /* save bond tags */
          bond_tags=0;
          bond_not_tags=0;
          ok = ChampAddBondToAtom(I,cur_atom,cur_bond);
          if(ok) {
            ok = ChampAddBondToAtom(I,base_atom,cur_bond);
            save_bond();
          }
          bond_flag=false;
          not_bond=false;
        } 
        base_atom = cur_atom;
        save_atom();
        break;
      case cSym_CloseBlock: /* should never be reached */
        break;
      case cSym_OpenScope: /* push base_atom onto stack */
        stack = ListElemPushInt(&I->Int,stack,base_atom);
        break;
      case cSym_CloseScope:
        if(!stack) {
          PRINTFB(FB_smiles_parsing,FB_errors)
            " champ: stack underflow for scope...\n"
            ENDFB;
          ok=false;
        } else {
          base_atom=I->Int[stack].value;
          stack = ListElemPop(I->Int,stack);
        }
        break;
      case cSym_Bond:
        bond_flag=true;
        break;
      case cSym_Mark:
        if(base_atom) {
          if(!mark[mark_code]) { /* opening cycle */
            mark[mark_code] = base_atom;
            mark_pri[mark_code] = lex_pri;
            bond_flag = false; /* ignore the first bond valence...we'll get it from the second half of the mark*/
            not_bond = false;
          } else { /* closing cycle */
            I->Bond[cur_bond].atom[0] = base_atom;
            I->Bond[cur_bond].atom[1] = mark[mark_code];
            I->Bond[cur_bond].pri[0] = lex_pri;
            I->Bond[cur_bond].pri[1] = mark_pri[mark_code];
            if(!bond_flag) {
              I->Bond[cur_bond].order = cH_Single;
            }
            ok = ChampAddBondToAtom(I,base_atom,cur_bond);
            if(ok) {
              ok = ChampAddBondToAtom(I,mark[mark_code],cur_bond);
              save_bond();
            }
            mark[mark_code]=0;
            bond_flag=false;
            not_bond=false;
          }
        } else {
          PRINTFB(FB_smiles_parsing,FB_errors)
            " champ:  syntax error...\n"
            ENDFB;
          ok = false;
        }
        break;
      case cSym_Separator:
        base_atom = 0;
        break;
      case cSym_Qualifier:
        break;
      }
    }
  }
  if(ok&&atom_list) {
    result = ListElemNewZero(&I->Pat);
    if(result) {
      I->ActivePatList = ListElemPushInt(&I->Int,I->ActivePatList,result);
      I->Pat[result].atom = atom_list;
      I->Pat[result].bond = bond_list;
    } else
      ok=false;
  }
  if(cur_atom) ChampAtomFree(I,cur_atom);
  if(cur_bond) ChampBondFree(I,cur_bond);
  if(result) ChampPatReindex(I,result);

  PRINTFD(FB_smiles_parsing) 
    " ChampSmiToPtr: returning pattern %d atom_list %d bond_list %d\n",result,atom_list,bond_list
    ENDFD;
  
  return(result);
}