def process_file()

in packages/miew/tools/pdbfreq.py [0:0]


def process_file(filename):
  print 'Processing %s...' % filename
  stats = defaultdict(int)
  stats_props = defaultdict(set)
  with open(filename) as f:
    for line in f:
      tag = line[0:6].strip().upper()
      if tag == 'REMARK':
        num = line[7:10].strip()
        if line[10] == ' ' and re.match(r'\d+', num):
          tag = '%s %s' % (tag, num)
      stats[tag] += 1
      if tag in tag_props:
        props = tag_props[tag]
        for key in props:
          left, right = props[key]
          value = line[left-1:right]
          tag_stats[tag][key][value].add(os.path.splitext(os.path.basename(filename))[0])
          stats_props['%s %s' % (tag, key)].add(value)
          if key == 'resName':
            if tag == 'ATOM':
              if value not in resNames:
                stats_props['%s %s_OTHER' % (tag, key)].add(value)
              elif value in resNamesNucleic:
                stats_props['%s %s_NUCLEIC' % (tag, key)].add(value)
              else:
                stats_props['%s %s_AMINO' % (tag, key)].add(value)
            elif tag == 'HETATM' and value in resNames:
              if value in resNamesNucleic:
                stats_props['%s %s_NUCLEIC' % (tag, key)].add(value)
              elif value in resNamesAmino:
                stats_props['%s %s_AMINO' % (tag, key)].add(value)
          elif key == 'element':
            if value.strip() not in elements:
              stats['%s %s_%s' % (tag, key, value.strip())] += 1
          elif key == 'charge':
            if value.strip():
              stats['%s %s_%s' % (tag, key, value.strip())] += 1
      if tag == 'COMPND':
        content = line[10:]
        if re.match(r'\s*MOL_ID: ', content):
          stats['COMPND MOL_ID'] += 1
      if tag.startswith('REMARK '):
        content = line[11:]
        if tag == 'REMARK 350':
          if re.match(r'\s*BIOMT\d ', content):
            stats['REMARK 350 BIOMTn'] += 1
          elif re.match(r'\s*BIOMOLECULE:', content):
            stats['REMARK 350 BIOMOLECULE'] += 1
        if tag == 'REMARK 290':
          if re.match(r'\s*SMTRY\d ', content):
            stats['REMARK 290 SMTRYn'] += 1
        
  files.append((os.path.basename(filename), stats, stats_props))