in src/main/python/vcf_snpeff_annotation.py [0:0]
def transvar_output(sample,tool,passfile,transvar_annotation,output):
tmp = open(output,'w')
input = open(passfile)
chrom_index,position_index,ref_index,alt_index,filter_index,format_index,sample_index="","","","","","",""
## currently supports vcf>=4.0
for value in input:
if value.startswith("#CHROM"):
header=value
tmp.write("#CHROM\tPOS\tREF\tALT\tSAMPLE\tGENOTYPE\tCOVERAGE\tAllele_FQ\tMUT_STATUS\tMUT_DETECTOR\tSTRAND\tGENE_SYMBOL\tENSEMBL_TRANSCRIPT\tCONSEQUENCE_TYPE\tVARIANT_REGION\tVARIANT_CLASSIFICATION\tcDNA_CHANGE\tAA_CHANGE\n")
data = value.rstrip().split("\t")
for i in range(len(data)):
if data[i]=="#CHROM":
chrom_index = i
elif data[i]=="POS":
position_index = i
elif data[i]=="REF":
ref_index = i
elif data[i]=="ALT":
alt_index = i
elif data[i]=="INFO":
info_index = i
elif data[i]=="FORMAT":
format_index = i
elif data[i]==sample or data[i].upper()=="SAMPLE" or data[i].upper()=='TUMOR':
sample_index = i
## selectively output some variables in vcf file
## chr,pos,ref,alt,sample,gt,dp,af
if not value.startswith("#"):
data = value.rstrip().split("\t")
variant = data[chrom_index]+"_"+data[position_index]+"_"+data[ref_index]+"_"+data[alt_index]
if variant in transvar_annotation:
af,cov,ad,gt,status='-',0,0,'-','-'
if info_index!="":
infor = data[info_index].split(";")
for inf in infor:
if len(inf.split("="))==2:
param,value=inf.split("=")
if param=="AF":
af=float(value)
if param=="DP":
cov=value
if param=="STATUS":
status=value.upper()
elif len(inf.split("="))==1:
if inf.upper()=="SOMATIC":
status=inf.upper()
if format_index!="":
format = data[format_index].split(":")
sampleValues = data[sample_index].split(":")
for p in range(len(format)):
if format[p]=="GT":
gt=sampleValues[p]
if af=='-':
if format[p]=="DP":
cov=sampleValues[p]
if format[p]=="AF":
af=float(sampleValues[p])
if format[p]=="AO":
ad=sampleValues[p]
if format[p]=="AD":
ad=sampleValues[p].split(",")[1]
if cov==0:
cov=str(int(sampleValues[p].split(",")[0])+int(sampleValues[p].split(",")[1]))
if format[p]==data[alt_index]+"U": ## Specific to strelka2 SNV vcf output
ad=sampleValues[p].split(",")[0]
if format[p]=="TIR": ## Specific to strelka2 INDEL vcf output
ad=sampleValues[p].split(",")[0]
if af=='-':
if int(ad)>0:
af=float(ad)/float(cov)
if status.endswith("SOMATIC") or status.startswith("SOMATIC") or status=="-":
if af=='-':
tmp.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" \
% (data[chrom_index],data[position_index],data[ref_index],data[alt_index],\
sample,gt,cov,af,status,tool,'\t'.join(transvar_annotation[variant])))
else:
tmp.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%.3f\t%s\t%s\t%s\n" \
% (data[chrom_index],data[position_index],data[ref_index],data[alt_index],\
sample,gt,cov,af,status,tool,'\t'.join(transvar_annotation[variant])))
tmp.close()