modules/chempy/mae.py (321 lines of code) (raw):
#A* -------------------------------------------------------------------
#B* This file contains source code for the PyMOL computer program
#C* copyright 1998-2000 by Warren Lyford Delano of DeLano Scientific.
#D* -------------------------------------------------------------------
#E* It is unlawful to modify or remove this copyright notice.
#F* -------------------------------------------------------------------
#G* Please see the accompanying LICENSE file for further information.
#H* -------------------------------------------------------------------
#I* Additional authors of this source file include:
#-*
#-*
#-*
#Z* -------------------------------------------------------------------
from chempy.models import Indexed,Connected
from chempy import Storage,Atom,Bond
# ad-hoc maestro file parser
import re
import copy
strip_re = re.compile(r'\#.*\#')
token_re = re.compile(r'"([^"]*)"|([^ ]+)')
array_re = re.compile(r'(.*)\[([0-9]+)\]')
coerce = {
's' : str,
'i' : int,
'r' : float }
class MAEParser:
def __init__(self,lst=None):
self.i = 0 # index in list
self.t = [] # token list
self.d = [] # hiearchy of data read
self.lst = lst
self.lst_len = len(lst)
def nxt_lin(self):
if self.lst:
if self.i<self.lst_len:
self.i = self.i + 1
return self.lst[self.i-1]
return None
def nxt_tok(self):
while 1:
if len(self.t):
return self.t.pop(0)
else:
l = self.nxt_lin()
if not l:
return None
l = strip_re.sub('',l).strip()
self.t = token_re.findall(l)
self.t = [''.join(x) for x in self.t]
return None
def push_tok(self,tok):
self.t.insert(0,tok)
def parse_top(self):
dct = {}
stk = [] # keyword stack
mode = 0 # 0 = definition, 1 = data
while 1:
tok = self.nxt_tok()
if tok==None:
break
if tok==':::':
mode = 1
if not len(stk):
mode = 0
if mode:
lab = stk.pop(0)
dct[lab] = coerce[lab[0]](*(tok,))
else:
stk.append(tok)
if tok=='}':
break
return dct
def parse_array(self,n_rec): # creates a list of homogenous lists
# each containing data for one field
dct = {}
data = [] # actual array data
stk = [] # keyword stack
coer = [] # coersion functions
n_fld = 0
mode = 0 # 0 = definition, 1 = data
cc = 0
while 1:
tok = self.nxt_tok()
if tok==None:
break
if tok=='}':
break
if tok==':::':
if not mode:
mode = 1
n_fld = len(stk)
c = 0
for a in stk: # create row index for the array
dct[a] = c
data.append([]) # add row for each field
coer.append(coerce[a[0]])
c = c + 1
elif not mode:
stk.append(tok)
else: # here we actually read the array
self.push_tok(tok)
for cc in range(n_rec):
tok = self.nxt_tok() # chuck index
if tok==':::': # truncated/incomplete array
break
c = 0
for c in range(n_fld):
tok = self.nxt_tok()
if tok==None:
break
data[c].append(coer[c](*(tok,)))
if tok=='}':
break
return (n_rec,dct,data) # return a tuple
def parse_m_ct(self):
dct = {}
stk = [] # keyword stack
mode = 0 # 0 = definition, 1 = data
while 1:
tok = self.nxt_tok()
if tok==None:
break
if tok==':::':
mode = 1
elif mode:
if not len(stk):
mode = 0
self.push_tok(tok) # go around
else:
dct[stk.pop(0)] = tok
else:
arm = array_re.findall(tok)
if len(arm):
arm = arm[0]
n_rec=int(arm[1])
if arm[0] in ['m_atom','m_bond']:
self.nxt_tok() # skip '{'
dct[arm[0]]=self.parse_array(n_rec)
else:
stk.append(tok)
if tok=='}':
break
return dct
def parse(self):
while 1:
tok = self.nxt_tok()
if tok==None:
break
if tok=='{':
self.d.append('top',self.parse_top())
elif tok in ['f_m_ct','p_m_ct']:
self.nxt_tok() # skip '{'
self.d.append(tok,self.parse_m_ct())
return self.d
class MAE(Storage):
def _read_m_atom(self,m_atom,model):
ma = model.atom
at_ent = m_atom[1]
at_dat = m_atom[2]
nAtom = m_atom[0]
if 'i_m_mmod_type' in at_ent:
a1 = at_dat[at_ent['i_m_mmod_type']]
for b in range(nAtom):
nt = a1[b]
ma[b].numeric_type = nt
ma[b].symbol = MMOD_atom_data[nt][1]
ma[b].text_type = MMOD_atom_data[nt][0]
if 'r_m_x_coord' in at_ent and \
'r_m_y_coord' in at_ent and \
'r_m_z_coord' in at_ent:
a1 = at_dat[at_ent['r_m_x_coord']]
a2 = at_dat[at_ent['r_m_y_coord']]
a3 = at_dat[at_ent['r_m_z_coord']]
for b in range(nAtom):
ma[b].coord = [a1[b],a2[b],a3[b] ]
if 'i_m_residue_number' in at_ent:
a1 = at_dat[at_ent['i_m_residue_number']]
for b in range(nAtom):
resi = a1[b]
ma[b].resi = str(resi)
ma[b].resi_number = resi
if 's_m_mmod_res' in at_ent:
a1 = at_dat[at_ent['s_m_mmod_res']]
for b in range(nAtom):
ma[b].resi_code = a1[b]
if 's_m_chain_name' in at_ent:
a1 = at_dat[at_ent['s_m_chain_name']]
for b in range(nAtom):
ma[b].chain = a1[b]
if 'i_m_color' in at_ent:
a1 = at_dat[at_ent['i_m_color']]
for b in range(nAtom):
ma[b].color_code = a1[b]
if 'r_m_charge1' in at_ent:
a1 = at_dat[at_ent['r_m_charge1']]
for b in range(nAtom):
ma[b].partial_charge = a1[b]
if 's_m_pdb_residue_name' in at_ent:
a1 = at_dat[at_ent['s_m_pdb_residue_name']]
for b in range(nAtom):
resn = a1[b].strip()
if len(resn):
ma[b].resn = resn
if 'i_m_formal_charge' in at_ent:
a1 = at_dat[at_ent['i_m_formal_charge']]
for b in range(nAtom):
ma[b].formal_charge = a1[b]
if 's_m_atom_name' in at_ent:
a1 = at_dat[at_ent['s_m_atom_name']]
for b in range(nAtom):
nam = a1[b].strip()
if len(nam):
ma[b].name = nam
if 's_m_pdb_atom_name' in at_ent:
a1 = at_dat[at_ent['s_m_pdb_atom_name']]
for b in range(nAtom):
nam = a1[b].strip()
if len(nam):
ma[b].name = nam
def _read_m_bond(self,m_bond,model):
bd_ent = m_bond[1]
bd_dat = m_bond[2]
nBond = m_bond[0]
if len(bd_dat[0]): # not empty right?
if 'i_m_from' in bd_ent and \
'i_m_to' in bd_ent and \
'i_m_order' in bd_ent:
a1 = bd_dat[bd_ent['i_m_from']]
a2 = bd_dat[bd_ent['i_m_to']]
a3 = bd_dat[bd_ent['i_m_order']]
for b in range(nBond):
bd1 = a1[b] - 1
bd2 = a2[b] - 1
bd3 = a3[b]
if bd1<bd2:
bnd = Bond()
bnd.index = [ bd1,bd2 ]
bnd.order = bd3
model.bond.append(bnd)
#---------------------------------------------------------------------------------
def fromList(self,MMODList): # returns a list of indexed models
mp = MAEParser(lst=MMODList)
mp_rec = mp.parse()
full_model = None
result = []
for mp_ent in mp_rec:
if mp_ent[0] == 'f_m_ct':
f_m_ct = mp_ent[1]
model = Indexed()
if 's_m_title' in f_m_ct:
model.molecule.title = f_m_ct['s_m_title'].strip()
if 'm_atom' in f_m_ct:
m_atom = f_m_ct['m_atom']
nAtom = m_atom[0]
for a in range(nAtom):
model.atom.append(Atom())
self._read_m_atom(m_atom,model)
if 'm_bond' in f_m_ct:
m_bond = f_m_ct['m_bond']
self._read_m_bond(m_bond,model)
full_model = model
result.append(model)
elif mp_ent[0]=='p_m_ct' and full_model!=None:
model = copy.deepcopy(full_model)
f_m_ct = mp_ent[1]
if 's_m_title' in f_m_ct:
model.molecule.title = f_m_ct['s_m_title'].strip()
if 'm_atom' in f_m_ct:
m_atom = f_m_ct['m_atom']
nAtom = m_atom[0]
self._read_m_atom(m_atom,model)
if 'm_bond' in f_m_ct:
m_bond = f_m_ct['m_bond']
self._read_m_bond(m_bond,model)
full_model = model
result.append(model)
return result
#---------------------------------------------------------------------------------
'#Ntype Atype Elem Hybr Att Chg\n',
MMOD_atom_data = {
1: ['C1','C' ,'sp' , 2, 0],
2: ['C2','C' ,'sp2', 3, 0],
3: ['C3','C' ,'sp3', 4, 0],
4: ['CA','C' ,'sp3', 3, 0],
5: ['CB','C' ,'sp3', 2, 0],
6: ['CC','C' ,'sp3', 1, 0],
7: ['CD','C' ,'sp2', 2, 0],
8: ['CE','C' ,'sp2', 1, 0],
9: ['CF','C' ,'sp' , 1, 0],
10: ['CM','C' ,'unk',-1,-1],
11: ['CP','C' ,'unk',-1, 1],
12: ['CR','C' ,'unk',-1, 0],
14: ['C0','C' ,'unk',-1, 0],
15: ['O2','O' ,'sp2', 1, 0],
16: ['O3','O' ,'sp3', 2, 0],
17: ['OA','O' ,'sp3', 1, 0],
18: ['OM','O' ,'sp3', 1,-1],
19: ['OW','O' ,'sp3', 0, 0],
20: ['OP','O' ,'sp2', 2, 1],
21: ['OQ','O' ,'sp3', 3, 1],
23: ['O0','O' ,'unk',-1, 0],
24: ['N1','N' ,'sp' , 1, 0],
25: ['N2','N' ,'sp2', 2, 0],
26: ['N3','N' ,'sp3', 3, 0],
27: ['NA','N' ,'sp3', 2, 0],
28: ['NB','N' ,'sp3', 1, 0],
29: ['NC','N' ,'sp3', 0, 0],
30: ['ND','N' ,'sp2', 1, 0],
31: ['N4','N' ,'sp2', 3, 1],
32: ['N5','N' ,'sp3', 4, 1],
33: ['NE','N' ,'sp3', 3, 1],
34: ['NF','N' ,'sp3', 2, 1],
35: ['NG','N' ,'sp3', 1, 1],
36: ['NH','N' ,'sp2', 2, 1],
37: ['NI','N' ,'sp2', 1, 1],
40: ['N0','N' ,'unk',-1, 0],
41: ['H1','H' ,'s' , 1, 0],
42: ['H2','H' ,'s' , 1, 0],
43: ['H3','H' ,'s' , 1, 0],
44: ['H4','H' ,'s' , 0, 0],
45: ['H5','H' ,'s' , 0, 0],
48: ['H0','H' ,'s' ,-1, 0],
49: ['S1','S' ,'sp3', 2, 0],
50: ['SA','S' ,'sp3', 1, 0],
51: ['SM','S' ,'sp3', 0,-1],
52: ['S0','S' ,'unk',-1, 0],
53: ['P0','P' ,'unk',-1, 0],
54: ['B2','B' ,'sp2', 2, 0],
55: ['B3','B' ,'sp3', 3, 0],
56: ['F0','F' ,'sp3', 1, 0],
57: ['Cl','Cl','sp3', 1, 0],
58: ['Br','Br','sp3', 1, 0],
59: ['I0','I' ,'sp3', 1, 0],
60: ['Si','Si','unk',-1, 0],
61: ['Du','Du','unk',-1, 0],
62: ['Du','Du','unk',-1, 0],
63: ['Lp','Lp','unk', 1, 0],
64: ['Du','Du','unk',-1, 0]};