def transvar_output()

in src/main/python/vcf_annovar_annotation.py [0:0]


def transvar_output(sample,passfile,transvar_annotation,output):
	tmp = open(output,'w')
	input = open(passfile)
	chrom_index,position_index,ref_index,alt_index,filter_index,format_index,sample_index="","","","","","",""
	## currently supports vcf>=4.0
	for value in input:
		if value.startswith("#CHROM"):
			header=value
			tmp.write("#CHROM\tPOS\tREF\tALT\tSAMPLE\tGENOTYPE\tCOVERAGE\tAllele_FQ\tMUT_STATUS\tSTRAND\tGENE_SYMBOL\tENSEMBL_TRANSCRIPT\tCONSEQUENCE_TYPE\tVARIANT_REGION\tVARIANT_TYPE\tcDNA_CHANGE\tAA_CHANGE\n")
			data = value.rstrip().split("\t")
			for i in range(len(data)):
				if data[i]=="#CHROM":
					chrom_index = i
				elif data[i]=="POS":
					position_index = i
				elif data[i]=="REF":
					ref_index = i
				elif data[i]=="ALT":
					alt_index = i
				elif data[i]=="INFO":
					info_index = i
				elif data[i]=="FORMAT":
					format_index = i
				elif data[i]==sample or data[i].upper()=="SAMPLE" or data[i].upper()=='TUMOR':
					sample_index = i
		## selectively output some variables in vcf file
		## chr,pos,ref,alt,sample,gt,dp,af
		if not value.startswith("#"):
			data = value.rstrip().split("\t")
			variant = data[chrom_index]+"_"+data[position_index]+"_"+data[ref_index]+"_"+data[alt_index]
			if variant in transvar_annotation:
				af,cov,ad,gt,status='-',0,0,'-','-'
				if info_index!="":
					infor = data[info_index].split(";")
					for inf in infor:
						if len(inf.split("="))==2:
							param,value=inf.split("=")
							if param=="AF":
								af=float(value)
							if param=="DP":
								cov=value
							if param=="STATUS":
								status=value.upper()
						elif len(inf.split("="))==1:
							if inf.upper()=="SOMATIC":
								status=inf.upper()
									
				if format_index!="":
					format = data[format_index].split(":")
					sampleValues = data[sample_index].split(":")
					for p in range(len(format)):
						if format[p]=="GT":
							gt=sampleValues[p]
						if af=='-':
							if format[p]=="DP":
								cov=sampleValues[p]
							if format[p]=="AF":
								af=float(sampleValues[p])
							if format[p]=="AO":
								ad=sampleValues[p]
							if format[p]=="AD":
								ad=sampleValues[p].split(",")[1]
								if cov==0:
									cov=str(int(sampleValues[p].split(",")[0])+int(sampleValues[p].split(",")[1]))
							if format[p]==data[alt_index]+"U":  ## Specific to strelka2 SNV vcf output
								ad=sampleValues[p].split(",")[0]
							if format[p]=="TIR":                ## Specific to strelka2 INDEL vcf output
								ad=sampleValues[p].split(",")[0]
								
					if af=='-':
						if int(ad)>0:
							af=float(ad)/float(cov)
							
				if status.endswith("SOMATIC") or status.startswith("SOMATIC") or status=="-":
					if af=='-':
						tmp.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" \
							% (data[chrom_index],data[position_index],data[ref_index],data[alt_index],\
							sample,gt,cov,af,status,'\t'.join(transvar_annotation[variant])))
					else:
						tmp.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%.3f\t%s\t%s\n" \
							% (data[chrom_index],data[position_index],data[ref_index],data[alt_index],\
							sample,gt,cov,af,status,'\t'.join(transvar_annotation[variant])))
	tmp.close()