def variant_annotation()

in src/main/python/vcf_snpeff_annotation.py [0:0]


def variant_annotation(variant,annotates,prefer_transcripts,longest_transcripts):
	keep_one_anno=dict()
	for data in annotates:
		transcript,gene,strand,coordinate,region,info=data
		if len(transcript.split('('))==2:
			transcript,consequence_type=transcript.split('(')[0][:-1],transcript.split('(')[1][:-1]
		else:
			transcript,consequence_type='.','.'
		dna,cdna,pro=coordinate.split('/')
		variant_type = variantType(region,dna,cdna,pro,info)
		score1,score2 = variantScore(transcript,consequence_type,region,variant_type,dna,cdna,pro)
		keep_one_anno[transcript]=[score1,score2,strand,gene,consequence_type,region,variant_type,dna,cdna,pro]
	
	n=0
	intersect_transcripts=list()
	for transcript in keep_one_anno:	
		if transcript in prefer_transcripts:
			intersect_transcripts.append(transcript)
			n+=1
	if n==1:
		score1,score2,strand,gene,consequence_type,region,variant_type,dna,cdna,pro = keep_one_anno[intersect_transcripts[0]]
		return (strand,gene,transcript,consequence_type,region,variant_type,cdna,pro)
	elif n==0:
		if len(keep_one_anno)>1:
			tmp_transcripts=list()
			transcript,values=sorted(keep_one_anno.items(),key=lambda kv:(kv[1][0],kv[1][1],kv[0]))[0]
			score1,score2,strand,gene,consequence_type,region,variant_type,dna,cdna,pro = values
			for tmp_transcript,tmp_value in sorted(keep_one_anno.items(),key=lambda kv:(kv[1][0],kv[1][1],kv[0])):
				tmp_score1,tmp_score2,tmp_gene = tmp_value[0],tmp_value[1],tmp_value[3]
				if tmp_score1==score1 and tmp_score2==score2:
					tmp_transcripts.append(variant+'_'+tmp_gene+'_'+tmp_transcript)					
			for tmp_variant in tmp_transcripts:
				if tmp_variant in longest_transcripts:
					return longest_transcripts[tmp_variant]
					break
			else:
				return (strand,gene,transcript,consequence_type,region,variant_type,cdna,pro)
		else:
			transcript,values=sorted(keep_one_anno.items(),key=lambda kv:(kv[1][0],kv[1][1]))[0]
			score1,score2,strand,gene,consequence_type,region,variant_type,dna,cdna,pro = values
			return (strand,gene,transcript,consequence_type,region,variant_type,cdna,pro)
	else:
		keep_one_anno_sub=dict()
		for key in keep_one_anno:
			if key in intersect_transcripts:
				keep_one_anno_sub[key] =keep_one_anno[key]

		if len(keep_one_anno_sub)>1:
			tmp_transcripts=list()
			transcript,values=sorted(keep_one_anno_sub.items(),key=lambda kv:(kv[1][0],kv[1][1],kv[0]))[0]
			score1,score2,strand,gene,consequence_type,region,variant_type,dna,cdna,pro = values
			for tmp_transcript,tmp_value in sorted(keep_one_anno_sub.items(),key=lambda kv:(kv[1][0],kv[1][1],kv[0])):
				tmp_score1,tmp_score2,tmp_gene = tmp_value[0],tmp_value[1],tmp_value[3]
				if tmp_score1==score1 and tmp_score2==score2:
					tmp_transcripts.append(variant+'_'+tmp_gene+'_'+tmp_transcript)					
			for tmp_variant in tmp_transcripts:
				if tmp_variant in longest_transcripts:
					return longest_transcripts[tmp_variant]
					break
			else:
				return (strand,gene,transcript,consequence_type,region,variant_type,cdna,pro)
		else:
			transcript,values=sorted(keep_one_anno_sub.items(),key=lambda kv:(kv[1][0],kv[1][1]))[0]
			score1,score2,strand,gene,consequence_type,region,variant_type,dna,cdna,pro = values
			return (strand,gene,transcript,consequence_type,region,variant_type,cdna,pro)				
		
	keep_one_anno.close()
	keep_one_anno_sub.close()