def ss()

in modules/pymol/util.py [0:0]
398 lines of code
98 McCabe index (conditional complexity)

def ss(selection="(name CA and alt '',A)",state=1,_self=cmd):
    '''
    Legacy secondary structure assignment routine. Don't use.

    Use instead: dss
    '''
    pymol=_self._pymol
    cmd=_self # NOT THREAD SAFE

    print(' util.ss: WARNING: This is not a "correct" secondary structure')
    print(' util.ss: assignment algorithm!  Please use only as a last resort.')
    
    cmd.feedback("push")
    cmd.feedback("disable","executive","actions")
    
    ss_pref = "_sss"
    sss1 = ss_pref+"1"
    cnt = cmd.select(sss1,"((byres ("+selection+")) and name CA and not het)")
    print(" util.ss: initiating secondary structure assignment on %d residues."%cnt)
    cas = cmd.index(sss1)
    if not len(cas):
        return
    # set cartoon mode to auto over the selection
    
    cmd.cartoon("auto",sss1)

    print(" util.ss: extracting sequence and relationships...")

    # get CA list
    
    res_list = []
    pymol._ss = pymol.Scratch_Storage()
    pymol._ss.res_list = res_list
    cmd.iterate(sss1,'_ss.res_list.append((model,index))')

    # generate atom-to-residue conversion dictionaries

    ca_dict = {}
    n_dict = {}
    o_dict = {}
    scr_dict = {} # scr = segment,chain,resi 
    pymol._ss.n_dict = n_dict
    pymol._ss.o_dict = o_dict
    pymol._ss.scr_dict = scr_dict
    pymol._ss.ca_dict = ca_dict
    cmd.iterate(sss1,
                    '_ss.scr_dict[(model,index)]=(segi,chain,resi)') # CA's
    cmd.iterate("((byres "+sss1+") and n;N)"
                    ,'_ss.scr_dict[(model,index)]=(segi,chain,resi)') # N's
    cmd.iterate("((byres "+sss1+") and n;O)",
                    '_ss.scr_dict[(model,index)]=(segi,chain,resi)') # O's
    cmd.iterate(sss1,
                    '_ss.ca_dict[(segi,chain,resi)] = (model,index)')
    cmd.iterate("((byres "+sss1+") and n;N)",
                    '_ss.n_dict[(segi,chain,resi)] = (model,index)')
    cmd.iterate("((byres "+sss1+") and n;O)",
                    '_ss.o_dict[(segi,chain,resi)] = (model,index)')

    scr_dict[None]=None
    o_dict[None]=None
    n_dict[None]=None
    ca_dict[None]=None
    
    # create special version of cas with gaps

    gap = [None,None,None,None]  
    # gap large enough to distinguish i+4 interations from gaps
    last = None
    for a in res_list:
        if last!=None:
            if(cmd.count_atoms(
                "((neighbor(neighbor(neighbor (%s`%d)))) and (%s`%d))"%
                (last[0],last[1],a[0],a[1]),quiet=1)==0):
                gap.extend([None,None,None,None])
        gap.append(a)
        last = a
    gap.extend([None,None,None,None])

    print(" util.ss: analyzing phi/psi angles (slow)...")

    # generate reverse-lookup for gap indices

    ss = {}

    c = 0
    gap_idx = {}
    for a in gap:
        gap_idx[a] = c
        c = c + 1

    # secondary structure database...
    
    ss = {}
    ss[None]=None
    
    # make decisions based on phi/psi

    for a in cas:
        ss[a] = 'L' # default
    phipsi = cmd.get_phipsi(sss1,state)
    for a in phipsi.keys():
        (phi,psi) = phipsi[a]
#      print scr_dict[a],(phi,psi)
        if (phi!=None) and (psi!=None):
            if ((phi<-45) and (phi>-160) and
                 (psi<-170) or (psi>10)): # beta?
                ss[a] = 's'
            elif ((phi<-45) and (phi>-160) and
                    (psi>-80) and (psi<-25)): # helix?
                ss[a] = 'H'
                
    print(" util.ss: finding hydrogen bonds...")
    
    # find all pairwise hydrogen bonds and make note of them in dict

    hb = cmd.find_pairs("((byres "+sss1+") and n;N)",
                              "((byres "+sss1+") and n;O)",mode=1,
                              cutoff=3.7,angle=55,
                              state1=state,state2=state)
    
    hb_dict = {}  # [((N-atom) (O-atom))] = 1
    n_hb_dict = {} # [(N-atom)] = [(O-atom),...]
    o_hb_dict = {} # [(O-atom)] = [(N-atom),...]
    for a in hb:
#      cmd.dist("(%s`%d)"%a[0],"(%s`%d)"%a[1])
        hb_dict[a] = 1
        n = a[0]
        o = a[1]
        if n not in n_hb_dict: n_hb_dict[n]=[]
        if o not in o_hb_dict: o_hb_dict[o]=[]
        n_hb_dict[n].append(o)
        o_hb_dict[o].append(n)

    # check to insure that all helical residues have at least an i +/- 4
    # hydrogen bond

    for c in range(4,len(gap)-4):
        a = gap[c]
        if ss[a]=='H':
            aN = n_dict[scr_dict[a]]
            aO = o_dict[scr_dict[a]]
            am4O = o_dict[scr_dict[gap[c-4]]]
            ap4N = n_dict[scr_dict[gap[c+4]]]
            if (aN,am4O) not in hb_dict:
                if (ap4N,aO) not in hb_dict:
                    ss[a]='L'

    print(" util.ss: verifying beta sheets...")
    
    # check to insure that all beta residues have proper interactions

    rep_dict = {}
    repeat = 1
    while repeat:
        repeat = 0
        c = 4
        cc = len(gap)-4
        while c<cc:
            a1 = gap[c]
            if (ss[a1] in ['s','S']) and a1 not in rep_dict:
                rep_dict[a1] = 1
                valid = 0
                scr_a1 = scr_dict[a1]
                # look for antiparallel 2:2 H-bonds (NH-O=C + C=O-HN) 
                n_a1_atom = n_dict[scr_a1]
                o_a1_atom = o_dict[scr_a1]
                if (n_a1_atom in n_hb_dict and 
                     o_a1_atom in o_hb_dict):
                    for n_hb_atom in n_hb_dict[n_a1_atom]:
                        for o_hb_atom in o_hb_dict[o_a1_atom]:
                            n_hb_scr = scr_dict[n_hb_atom]
                            o_hb_scr = scr_dict[o_hb_atom]
                            if o_hb_scr == n_hb_scr:
                                b1 = ca_dict[o_hb_scr]
                                if abs(c-gap_idx[b1])>2:
                                    ss[b1] = 'S' 
                                    ss[a1] = 'S' 
                                    valid = 1
                # look for antiparallel offset HB (i,i+2,j,j-2)
                a3 = gap[c+2]
                if (a3!=None):
                    scr_a3 = scr_dict[a3]
                    o_a1_atom = o_dict[scr_a1]
                    n_a3_atom = n_dict[scr_a3]
                    if (n_a3_atom in n_hb_dict and
                         o_a1_atom in o_hb_dict):               
                        for n_hb_atom in n_hb_dict[n_a3_atom]:
                            for o_hb_atom in o_hb_dict[o_a1_atom]:
                                n_hb_scr = scr_dict[n_hb_atom]
                                o_hb_scr = scr_dict[o_hb_atom]
                                b1 = ca_dict[o_hb_scr]
                                if b1!=None:
                                    b1_i = gap_idx[b1]
                                    if abs(c-b1_i)>2: # no turns!
                                        b3 = gap[b1_i-2]
                                        if b3!=None:
                                            b3_scr = scr_dict[b3]
                                            if b3_scr == n_hb_scr:
                                                a2 = gap[c+1]
                                                b2 = gap[gap_idx[b1]-1]
                                                ss[b1] = 'S'
                                                ss[b3] = 'S'
                                                ss[a1] = 'S'
                                                ss[a3] = 'S'
                                                if ss[a2]=='L': ss[a2] = 's'
                                                if ss[b2]=='L': ss[b2] = 's'
                                                valid = 1
                # look for antiparallel offset HB (i,i-2,j,j+2)
                a3 = gap[c-2]
                if (a3!=None):
                    scr_a3 = scr_dict[a3]
                    n_a1_atom = n_dict[scr_a1]
                    o_a3_atom = o_dict[scr_a3]
                    if (n_a1_atom in n_hb_dict and
                         o_a3_atom in o_hb_dict):               
                        for n_hb_atom in n_hb_dict[n_a1_atom]:
                            for o_hb_atom in o_hb_dict[o_a3_atom]:
                                n_hb_scr = scr_dict[n_hb_atom]
                                o_hb_scr = scr_dict[o_hb_atom]
                                b1 = ca_dict[o_hb_scr]
                                if b1!=None:
                                    b1_i = gap_idx[b1]
                                    if abs(c-b1_i)>2: # no turns!
                                        b3 = gap[b1_i-2]
                                        if b3!=None:
                                            b3_scr = scr_dict[b3]
                                            if b3_scr == n_hb_scr:
                                                a2 = gap[c-1]
                                                b2 = gap[gap_idx[b1]-1]
                                                ss[b1] = 'S'
                                                ss[b3] = 'S'
                                                ss[a1] = 'S'
                                                ss[a3] = 'S'
                                                if ss[a2]=='L': ss[a2] = 's'
                                                if ss[b2]=='L': ss[b2] = 's'
                                                valid = 1
                # look for parallel 1:3 HB (i,j-1,j+1)
                n_a1_atom = n_dict[scr_a1]
                o_a1_atom = o_dict[scr_a1]
                if (n_a1_atom in n_hb_dict and
                     o_a1_atom in o_hb_dict):
                    for n_hb_atom in n_hb_dict[n_a1_atom]:
                        for o_hb_atom in o_hb_dict[o_a1_atom]:
                            n_hb_scr = scr_dict[n_hb_atom]
                            o_hb_scr = scr_dict[o_hb_atom]
                            b0 = ca_dict[n_hb_scr]
                            if b0!=None:
                                b2 = gap[gap_idx[b0]+2]
                                if b2!=None:
                                    b2_scr = scr_dict[b2]
                                    if b2_scr == o_hb_scr:
                                        b1 = gap[gap_idx[b0]+1]
                                        ss[a1] = 'S' 
                                        ss[b0] = 'S'
                                        if ss[b1]=='L': ss[b1]='s'
                                        ss[b2] = 'S'
                                        valid = 1
                                        repeat = 1
                if not valid:
                    ss[a1] = 'L'
            c = c + 1

    # automatically fill 1 residue gaps in helices and well-defined sheets
    c = 4
    cc = len(gap)-6
    while c<cc:
        a1 = gap[c]
        a3 = gap[c+2]
        ss_a1 = ss[a1]
        ss_a3 = ss[a3]
        if (ss_a1==ss_a3) and (ss_a1 in ['S','H']):
            a2 = gap[c+1]
            ss[a2] = ss_a1
        c = c + 1

    # remove singleton sheet residues
    c = 4
    cc = len(gap)-4
    while c<cc:
        a0 = gap[c-1]
        a1 = gap[c]
        a2 = gap[c+1]
        if ss[a1] in ['s','S']:
            if ((not ss[a0] in ['s','S']) and
                 (not ss[a2] in ['s','S'])):
                 ss[a1] = 'L'
        c = c + 1

    # remove sheet residues which aren't next to another sheet 
    c = 4
    cc = len(gap)-4
    while c<cc:
        a1 = gap[c]
        if ss[a1]=='S':
            a1 = gap[c]
            scr_a1 = scr_dict[a1]
            # look for hydrogen bonds to another sheet
            n_a1_atom = n_dict[scr_a1]
            o_a1_atom = o_dict[scr_a1]
            certain = 0
            if n_a1_atom in n_hb_dict:
                for n_hb_atom in n_hb_dict[n_a1_atom]:
                    n_hb_ca_atom=ca_dict[scr_dict[n_hb_atom]]
                    if ss[n_hb_ca_atom]=='S':
                        certain = 1
                        break
            if o_a1_atom in o_hb_dict:
                for o_hb_atom in o_hb_dict[o_a1_atom]:
                    o_hb_ca_atom=ca_dict[scr_dict[o_hb_atom]]
                    if ss[o_hb_ca_atom]=='S':
                        certain = 1
                        break
            if not certain:
                ss[a1] = 's'
        c = c + 1

    # remove questionable sheet residues
    c = 4
    cc = len(gap)-4
    while c<cc:
        a0 = gap[c-1]
        a1 = gap[c]
        a2 = gap[c+1]
        if ss[a1]=='s':
            if (not ((ss[a0]=='S') and (ss[a2]=='S'))):
                ss[a1] = 'L'
        c = c + 1

    # extend helices if hydrogen bonding requirements are met
    rep_dict = {}
    repeat = 1
    while repeat:
        repeat = 0
        c = 4
        cc = len(gap)-4
        while c<cc:
            a = gap[c]
            if a not in rep_dict:
                if ss[gap[c+1]]=='H':
                    rep_dict[a] = 1
                    if ss[a]!='H': # N-terminal end
                        aO = o_dict[scr_dict[a]]
                        ap4N = n_dict[scr_dict[gap[c+4]]]
                        ap3N = n_dict[scr_dict[gap[c+3]]]
                        if (ap4N,aO) in hb_dict or (ap3N,aO) in hb_dict:
                            ss[a]='H'
                            repeat = 1
                            c = c - 5
                            if c<4: c=4
                if ss[gap[c-1]]=='H':
                    a = gap[c]
                    if ss[a]!='H': # C-terminal end
                        rep_dict[a] = 1
                        aN = n_dict[scr_dict[a]]
                        am4O = o_dict[scr_dict[gap[c-4]]]
                        am3O = o_dict[scr_dict[gap[c-3]]]
                        if (aN,am4O) in hb_dict or (aN,am3O) in hb_dict:
                            ss[a]='H'
                            repeat = 1
                            c = c - 5
                            if c<4: c=4
            c = c + 1

    # remove doubleton helices

    c = 4
    cc = len(gap)-5
    while c<cc:
        a0 = gap[c-1]
        a1 = gap[c]
        a2 = gap[c+1]
        a3 = gap[c+2]
        ss_a0 = ss[gap[c-1]]
        ss_a1 = ss[gap[c]]
        ss_a2 = ss[gap[c+1]]
        ss_a3 = ss[gap[c+2]]
        if ss_a1=='H':
            if (ss_a2==ss_a1) and (ss_a0!=ss_a2) and (ss_a2!=ss_a3):
                ss[a1] = 'L'
                ss[a2] = 'L'
        c = c + 1

    # remove totally unreasonable helix and sheet residues

    c = 4
    cc = len(gap)-5
    while c<cc:
        a1 = gap[c]
        ss_a1 = ss[gap[c]]
        if ss_a1=='H':
            if a1 in phipsi:
                (phi,psi) = phipsi[a1]
                if (phi>0) and (phi<150):
                    ss[a1] = 'L'
                elif((psi<-120) or (psi>140)):
                    ss[a1] = 'L'
        elif ss_a1 in ['S','s']:
            if a1 in phipsi:
                (phi,psi) = phipsi[a1]
                if (phi>45) and (phi<160):
                    ss[a1] = 'L'
#            if (psi<-30) and (psi>-150):
                if (psi<-65) and (psi>-150):
                    ss[a1] = 'L'
            
        c = c + 1


    for x in range(1,3):
        # remove singleton sheet residues
        c = 4
        cc = len(gap)-4
        while c<cc:
            a0 = gap[c-1]
            a1 = gap[c]
            a2 = gap[c+1]
            if ss[a1] in ['s','S']:
                if ((not ss[a0] in ['s','S']) and
                     (not ss[a2] in ['s','S'])):
                     ss[a1] = 'L'
            c = c + 1

        # remove sheet residues which aren't next to another sheet 
        c = 4
        cc = len(gap)-4
        while c<cc:
            a1 = gap[c]
            if ss[a1]=='S':
                a1 = gap[c]
                scr_a1 = scr_dict[a1]
                # look for hydrogen bonds to another sheet
                n_a1_atom = n_dict[scr_a1]
                o_a1_atom = o_dict[scr_a1]
                certain = 0
                if n_a1_atom in n_hb_dict:
                    for n_hb_atom in n_hb_dict[n_a1_atom]:
                        n_hb_ca_atom=ca_dict[scr_dict[n_hb_atom]]
                        if ss[n_hb_ca_atom]=='S':
                            certain = 1
                            break
                if o_a1_atom in o_hb_dict:
                    for o_hb_atom in o_hb_dict[o_a1_atom]:
                        o_hb_ca_atom=ca_dict[scr_dict[o_hb_atom]]
                        if ss[o_hb_ca_atom]=='S':
                            certain = 1
                            break
                if not certain:
                    ss[a1] = 's'
            c = c + 1

        # remove questionable sheet residues
        c = 4
        cc = len(gap)-4
        while c<cc:
            a0 = gap[c-1]
            a1 = gap[c]
            a2 = gap[c+1]
            if ss[a1]=='s':
                if (not ((ss[a0]=='S') and (ss[a2]=='S'))):
                    ss[a1] = 'L'
            c = c + 1

#      lst = ss.keys()
#      lst.sort()
#      for a in lst: print scr_dict[a],ss[a]
        
    # assign protein
    for a in cas:
        if ss[a]=='s':
            ss[a]='S'
        
    cmd.alter(sss1,"ss ='L'")
    for a in cas:
        if ss[a]!='L':
            cmd.alter("(%s`%d)"%a,"ss='%s'"%ss[a])

    cmd.feedback("pop")

    del pymol._ss # IMPORTANT
    cmd.delete(sss1)
    cmd.rebuild(selection,'cartoon')
    #
#   print conn_hash.keys()
    print(" util.ss: assignment complete.")