in packages/miew/tools/pdbfreq.py [0:0]
def process_file(filename):
print 'Processing %s...' % filename
stats = defaultdict(int)
stats_props = defaultdict(set)
with open(filename) as f:
for line in f:
tag = line[0:6].strip().upper()
if tag == 'REMARK':
num = line[7:10].strip()
if line[10] == ' ' and re.match(r'\d+', num):
tag = '%s %s' % (tag, num)
stats[tag] += 1
if tag in tag_props:
props = tag_props[tag]
for key in props:
left, right = props[key]
value = line[left-1:right]
tag_stats[tag][key][value].add(os.path.splitext(os.path.basename(filename))[0])
stats_props['%s %s' % (tag, key)].add(value)
if key == 'resName':
if tag == 'ATOM':
if value not in resNames:
stats_props['%s %s_OTHER' % (tag, key)].add(value)
elif value in resNamesNucleic:
stats_props['%s %s_NUCLEIC' % (tag, key)].add(value)
else:
stats_props['%s %s_AMINO' % (tag, key)].add(value)
elif tag == 'HETATM' and value in resNames:
if value in resNamesNucleic:
stats_props['%s %s_NUCLEIC' % (tag, key)].add(value)
elif value in resNamesAmino:
stats_props['%s %s_AMINO' % (tag, key)].add(value)
elif key == 'element':
if value.strip() not in elements:
stats['%s %s_%s' % (tag, key, value.strip())] += 1
elif key == 'charge':
if value.strip():
stats['%s %s_%s' % (tag, key, value.strip())] += 1
if tag == 'COMPND':
content = line[10:]
if re.match(r'\s*MOL_ID: ', content):
stats['COMPND MOL_ID'] += 1
if tag.startswith('REMARK '):
content = line[11:]
if tag == 'REMARK 350':
if re.match(r'\s*BIOMT\d ', content):
stats['REMARK 350 BIOMTn'] += 1
elif re.match(r'\s*BIOMOLECULE:', content):
stats['REMARK 350 BIOMOLECULE'] += 1
if tag == 'REMARK 290':
if re.match(r'\s*SMTRY\d ', content):
stats['REMARK 290 SMTRYn'] += 1
files.append((os.path.basename(filename), stats, stats_props))