in src/main/python/vcf_snpeff_annotation.py [0:0]
def variant_annotation(variant,annotates,prefer_transcripts,longest_transcripts):
keep_one_anno=dict()
for data in annotates:
transcript,gene,strand,coordinate,region,info=data
if len(transcript.split('('))==2:
transcript,consequence_type=transcript.split('(')[0][:-1],transcript.split('(')[1][:-1]
else:
transcript,consequence_type='.','.'
dna,cdna,pro=coordinate.split('/')
variant_type = variantType(region,dna,cdna,pro,info)
score1,score2 = variantScore(transcript,consequence_type,region,variant_type,dna,cdna,pro)
keep_one_anno[transcript]=[score1,score2,strand,gene,consequence_type,region,variant_type,dna,cdna,pro]
n=0
intersect_transcripts=list()
for transcript in keep_one_anno:
if transcript in prefer_transcripts:
intersect_transcripts.append(transcript)
n+=1
if n==1:
score1,score2,strand,gene,consequence_type,region,variant_type,dna,cdna,pro = keep_one_anno[intersect_transcripts[0]]
return (strand,gene,transcript,consequence_type,region,variant_type,cdna,pro)
elif n==0:
if len(keep_one_anno)>1:
tmp_transcripts=list()
transcript,values=sorted(keep_one_anno.items(),key=lambda kv:(kv[1][0],kv[1][1],kv[0]))[0]
score1,score2,strand,gene,consequence_type,region,variant_type,dna,cdna,pro = values
for tmp_transcript,tmp_value in sorted(keep_one_anno.items(),key=lambda kv:(kv[1][0],kv[1][1],kv[0])):
tmp_score1,tmp_score2,tmp_gene = tmp_value[0],tmp_value[1],tmp_value[3]
if tmp_score1==score1 and tmp_score2==score2:
tmp_transcripts.append(variant+'_'+tmp_gene+'_'+tmp_transcript)
for tmp_variant in tmp_transcripts:
if tmp_variant in longest_transcripts:
return longest_transcripts[tmp_variant]
break
else:
return (strand,gene,transcript,consequence_type,region,variant_type,cdna,pro)
else:
transcript,values=sorted(keep_one_anno.items(),key=lambda kv:(kv[1][0],kv[1][1]))[0]
score1,score2,strand,gene,consequence_type,region,variant_type,dna,cdna,pro = values
return (strand,gene,transcript,consequence_type,region,variant_type,cdna,pro)
else:
keep_one_anno_sub=dict()
for key in keep_one_anno:
if key in intersect_transcripts:
keep_one_anno_sub[key] =keep_one_anno[key]
if len(keep_one_anno_sub)>1:
tmp_transcripts=list()
transcript,values=sorted(keep_one_anno_sub.items(),key=lambda kv:(kv[1][0],kv[1][1],kv[0]))[0]
score1,score2,strand,gene,consequence_type,region,variant_type,dna,cdna,pro = values
for tmp_transcript,tmp_value in sorted(keep_one_anno_sub.items(),key=lambda kv:(kv[1][0],kv[1][1],kv[0])):
tmp_score1,tmp_score2,tmp_gene = tmp_value[0],tmp_value[1],tmp_value[3]
if tmp_score1==score1 and tmp_score2==score2:
tmp_transcripts.append(variant+'_'+tmp_gene+'_'+tmp_transcript)
for tmp_variant in tmp_transcripts:
if tmp_variant in longest_transcripts:
return longest_transcripts[tmp_variant]
break
else:
return (strand,gene,transcript,consequence_type,region,variant_type,cdna,pro)
else:
transcript,values=sorted(keep_one_anno_sub.items(),key=lambda kv:(kv[1][0],kv[1][1]))[0]
score1,score2,strand,gene,consequence_type,region,variant_type,dna,cdna,pro = values
return (strand,gene,transcript,consequence_type,region,variant_type,cdna,pro)
keep_one_anno.close()
keep_one_anno_sub.close()