in clns-eTarget_ingest/targetdata.py [0:0]
def processGDLData(self, filename, xmlString):
# Parse the xml string into a usable format
try:
# Get the patient data
root = ET.fromstring(xmlString)
assert root.find('.//identifier[@type="Your ref"]') is not None, 'identifier "Your ref" missing'
christieNo = root.find('.//identifier[@type="Your ref"]').text
assert root.find('.//identifier[@type="Our ref"]') is not None, 'identifier "Our ref" missing'
reportRef = root.find('.//identifier[@type="Our ref"]').text
assert root.find('.//submitted') is not None, 'report submitted date missing'
reportDate = root.find('.//submitted').text
assert root.find('.//activated') is not None, 'report activated date missing'
gdlDate = root.find('.//activated').text
reportDateFormatted = datetime.datetime.strptime(reportDate, '%d-%b-%Y').strftime('%Y-%m-%d')
assert root.find('.//identifier') is not None, "Path lab can't be missing or empty"
pathLabRef = root.find('.//identifier[@type="Path lab"]').text
coverage = 0
if root.find('.//coverage') is not None:
coverage = root.find('.//coverage').text
coverage = coverage.replace("%", "")
comments = 'n/a'
if root.find('.//comments') is not None:
comments = root.find('.//comments').text
comment = 'n/a'
if root.find('.//comment') is not None:
comment = root.find('.//comment').text
# Update the database, measurement_gene_panel table
cursor = self.conn.cursor()
# Get the specimen ID for this patient
hash_object = hashlib.sha256(christieNo.encode('utf-8'))
hex_dig = hash_object.hexdigest()
print("hex_dig " + str(hex_dig));
#gdlSQL = "SELECT specimen.specimen_id FROM SPECIMEN AS specimen, PERSON AS person WHERE person.hospital_number_hash = '"+hex_dig+"'"
gdlSQL = """SELECT TOP 1 specimen.specimen_id FROM SPECIMEN AS specimen left join PERSON AS person on specimen.person_id= person.person_id
WHERE specimen.person_id = (SELECT person_id FROM PERSON WHERE person.hospital_number_hash = %s) AND (specimen.specimen_concept_id = 2 OR specimen.specimen_concept_id = 3)
AND dbo.RemoveNonAlphaNumChars(tumour_id) like dbo.RemoveNonAlphaNumChars(%s) order by specimen.specimen_id asc"""
cursor.execute(gdlSQL, (hex_dig, pathLabRef))
#print(gdlSQL)
specimenID = 0
for row in cursor:
specimenID = row[0]
print('SPECIMEN ID IS: '+str(specimenID))
if specimenID == 0:
# No specimen found
self.log.logMessage('No specimen ID found for '+filename)
raise Exception('No specimen ID found.')
else:
#check resubmission tumour
resubmission,prev_gene_panel_id=self.checkResubmissionTumour(specimenID, 1, 1)
print("resubmission: " + str(resubmission) + "prev_gene_panel: " + str(prev_gene_panel_id))
updateSelection=False
if resubmission==1:
self.deleteOldTumourReport(prev_gene_panel_id);
self.log.systemStatusUpdate(filename, 'GDL', self.timestamp(), "Resubmit of file -- delete old content")
self.log.logMessage(filename + ' Resubmit of file -- delete old content')
if resubmission==2:
updateSelection=True
#find the latest GDL panel id
cursor = self.conn.cursor()
genePanelSQL= "select top 1 data_source_concept_id from CONCEPT_DATA_SOURCES where valid_start_date<%s order by valid_start_date desc"
cursor.execute(genePanelSQL, (reportDateFormatted))
row=cursor.fetchone()
if row is not None:
data_source_concept_id=row[0]
else:
data_source_concept_id=2
# Update MEASUREMENT_GENE_PANEL
cursor = self.conn.cursor()
gdlSQL = """INSERT INTO MEASUREMENT_GENE_PANEL (specimen_id, data_source_concept_id, coverage, average_read_depth, ngs_run, analysis_failed, no_mutations_found, run_number, baseline_number, comments, unknown_significance)
VALUES(%s, %s, %s, 'n/a', 'GDL', 'false', 'false', 1, 1, %s, %s)"""
#print(gdlSQL)
cursor.execute(gdlSQL, (specimenID, data_source_concept_id, coverage, comments, comment))
measurement_gene_panel_id = cursor.lastrowid
#print(gdlSQL)
# Update REPORT
cursor = self.conn.cursor()
reportFilename = reportRef+'.pdf'
reportSQL = "INSERT INTO REPORT (date_issued, measurement_gene_panel_id, filename) VALUES(%s, %d, %s)"
cursor.execute(reportSQL, (reportDateFormatted, measurement_gene_panel_id, reportFilename))
#print(reportSQL)
# Update PATH_LAB_REF
cursor = self.conn.cursor()
pathSQL = "INSERT INTO PATH_LAB_REF (path_lab_ref, specimen_id, measurement_gene_panel_id) VALUES(%s, %d, %d)"
#print(pathSQL)
cursor.execute(pathSQL, (pathLabRef, specimenID, measurement_gene_panel_id))
# Update GDL_REQUEST
cursor = self.conn.cursor()
gdlDateFormatted = datetime.datetime.strptime(gdlDate, '%d-%b-%Y').strftime('%Y-%m-%d')
#print(gdlDate)
#print(gdlDateFormatted)
#reportSQL = "INSERT INTO GDL_REQUEST (date_requested, path_lab_ref, specimen_id, sample_type) VALUES('"+gdlDateFormatted+"', '"+pathLabRef+"', '"+str(specimenID)+"', '')"
#reportSQL = "UPDATE GDL_REQUEST SET date_requested='"+gdlDateFormatted+"', path_lab_ref='"+pathLabRef+"' WHERE specimen_id='"+str(specimenID)+"'"
#print(reportSQL)
#cursor.execute(reportSQL)
# Get the gene results data and update MEASUREMENT_GENE_VARIANT
geneCount = 0
try:
insertSGVarguments=[]
for gene in root.iter(tag = 'gene'):
geneName = gene.attrib['name']
for variant in gene[0]:
cdna = variant[0].text
if variant.find('protein') is not None:
amino = variant.find('protein').text
else:
amino = ""
if variant.find('reads') is not None:
reads = variant.find('reads').text
reads = reads.replace("%", "")
else:
reads = ""
cursor = self.conn.cursor()
gdlSQL = "INSERT INTO MEASUREMENT_GENE_VARIANT (measurement_gene_panel_id, gene_concept_id, cdna_change, amino_acid_change, variant_allele_frequency) VALUES(%d, (SELECT TOP 1 gene_concept_id FROM CONCEPT_GENE WHERE gene_name = %s), %s, %s, %s)"
cursor.execute(gdlSQL,(measurement_gene_panel_id, geneName, cdna, amino, reads))
measurement_gene_variant_id = cursor.lastrowid
# new version of old tumour report keep selections
if updateSelection == True:
findPreviouseSelectionQuery="select person_id from SELECTED_GENE_VARIANT FULL OUTER JOIN MEASUREMENT_GENE_VARIANT on MEASUREMENT_GENE_VARIANT.measurement_gene_variant_id=SELECTED_GENE_VARIANT.measurement_gene_variant_id FULL OUTER JOIN MEASUREMENT_GENE_PANEL on MEASUREMENT_GENE_PANEL.measurement_gene_panel_id=MEASUREMENT_GENE_VARIANT.measurement_gene_panel_id where person_id=(SELECT person_id FROM PERSON WHERE person.hospital_number_hash = '"+hex_dig+"') and type='NGS' and baseline_number=1 and run_number=1 and MEASUREMENT_GENE_PANEL.measurement_gene_panel_id=" +str(prev_gene_panel_id)+ " and gene_concept_id=(SELECT TOP 1 gene_concept_id FROM CONCEPT_GENE WHERE gene_name = '"+str(geneName)+"') and amino_acid_change = '"+str(amino)+"' and cdna_change ='"+str(cdna)+"';"
findPreviousSelectionCursor = self.conn.cursor()
findPreviousSelectionCursor.execute(findPreviouseSelectionQuery)
row=findPreviousSelectionCursor.fetchone()
findPreviousSelectionCursor.close()
if row is not None:
#was previously selected
insertSGVarguments.append((hex_dig, measurement_gene_variant_id))
insertSelectionQuery="insert into SELECTED_GENE_VARIANT (person_id, measurement_gene_variant_id, type) values((SELECT person_id FROM PERSON WHERE person.hospital_number_hash = %s),%d,'NGS');"
if len(insertSGVarguments)>0:
insertSelectionCursor = self.conn.cursor()
insertSelectionCursor.executemany(insertSelectionQuery, insertSGVarguments)
insertSelectionCursor.close()
except Exception as e:
print(str(e))
print('Error importing genes.')
self.log.logMessage('GDL -- Error importing genes.' +str(e))
raise e
# Delete the copies
self.log.logMessage('Deleting GDL xml data file '+filename)
try:
self.conn.commit()
if self.file_service.exists(self.data, None, filename):
self.file_service.delete_file(self.data, None, filename)
self.log.systemStatusUpdate(filename, 'GDL', self.timestamp(), 'Success')
except:
self.log.logMessage('There was a problem deleting '+filename)
except Exception as e:
self.log.systemStatusUpdate(filename, 'GDL', self.timestamp(), 'Error: XML file could not be imported.')
self.conn.rollback()
print(str(e))
return 0