clns-eTarget_ingest/targetdata.py

# # This file is part of the eTarget ingest distribution (https://github.com/digital-ECMT/eTarget_ingest). # Copyright (C) 2017 - 2021 digital ECMT # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # ''' Parse data from CSV files into the TARGET database. @author rob.dunne@manchester.ac.uk July 2017 0. Start: cronjob runs this 1. Check for files in the Samba network share: getNewFiles() 2. Loop over files: getNewFiles() 3. Process the file contents: processFile() 4. Update the database: updateDatabase() 5. Delete the file: processFile() 6. Repeat until no more files 7. End. ''' import csv import pymssql import os import sys import datetime from azure.storage.file import FileService from azure.storage.blob import AppendBlobService from azure.storage.blob import BlockBlobService from azure.storage.blob import ContentSettings import io import base64 from Crypto import Random from Crypto.Cipher import AES from ftpretty import ftpretty import xml.etree.ElementTree as ET import hashlib import foundationmedicine import clinicaldata import clinical_json import genomicdata_json import ihc_report import re import utilities from pycel.lib.lookup import row from fileinput import filename from openpyxl.compat.strings import file from azure.storage.file.models import File from builtins import str class TargetData(): # Constructor def __init__(self): self.filepath = os.path.dirname(os.path.abspath(__file__)) self.config = self.getConfig() self.AKEY = self.config['patientkey'] self.iv = Random.new().read(AES.block_size) self.file_service = FileService(account_name=self.config['fileuser'], account_key=self.config['filekey']) self.append_blob_service = AppendBlobService(account_name=self.config['fileuser'], account_key=self.config['filekey']) self.block_blob_service = BlockBlobService(account_name=self.config['fileuser'], account_key=self.config['filekey']) self.pdf_blob_service = BlockBlobService(connection_string=self.config['storageurl']) self.container_name = self.config['containername'] self.log = utilities.Util(self.config['remotehostname'], self.config['remoteusername'], \ self.config['remotepassword'], self.config['remotedbname'],self.config['fileuser'],self.config['filekey'],self.config['logblob']) self.conn = pymssql.connect(self.config['remotehostname'], self.config['remoteusername'], self.config['remotepassword'], self.config['remotedbname'], autocommit=False) try: assert 'datadir' in self.config self.data=self.config['datadir'] except Exception as e: self.data='data' self.getNewFiles() def getNewFiles(self): # Get new files print('') processedFiles = 0 #files = os.listdir(self.filepath) files = self.file_service.list_directories_and_files(self.data) for file in files: try: # Filter the files if file.name.lower().endswith('.csv') and not file.name.startswith('.'): try: fileContent = self.file_service.get_file_to_text(self.data, None, file.name) except Exception as e: fileContent = self.file_service.get_file_to_text(self.data, None, file.name, encoding="cp1252") fileContentString = fileContent.content firstCell = fileContentString.split(',') if 'Patient_ID' in firstCell[0]: # Blood sample report self.log.logMessage('Processing blood report: '+file.name) self.processBloodFile(file.name) processedFiles = processedFiles+1 elif 'hospitalnumber' in firstCell[0].lower(): # Christie patient data self.log.logMessage('Processing Christie data: '+file.name) cd = clinicaldata.ClinicalData(file.name,self.config['remotehostname'], self.config['remoteusername'], \ self.config['remotepassword'], self.config['remotedbname'],self.config['fileuser'],self.config['filekey'],self.data, self.config['logblob']) if cd.ingest()==0: cd.deleteFile() #self.processChristieFile(file.name) processedFiles = processedFiles+1 else: self.log.logMessage('Unknown CSV file: '+file.name) self.log.logMessage('Skipping file: '+file.name) if file.name.lower().endswith('.pdf') and not file.name.startswith('.'): pdf = self.file_service.get_file_to_bytes(self.data, None, file.name) with open(self.filepath+'/tmp/'+file.name, 'wb') as f: f.write(pdf.content) self.processPDFReport(file.name) if file.name.lower().endswith('.jpg') and not file.name.startswith('.'): self.processIHCPictures(file.name) if file.name.lower().endswith('.xml') and not file.name.startswith('.'): fileContent = self.file_service.get_file_to_text(self.data, None, file.name) fileContentString = fileContent.content if('foundationmedicine.com' in fileContentString): try: fm = foundationmedicine.FoundationMedicine(file.name,self.config['remotehostname'], self.config['remoteusername'], \ self.config['remotepassword'], self.config['remotedbname'],self.config['fileuser'],self.config['filekey'], self.data, self.config['logblob']) fm.ingest() fm.deleteFile() except Exception as e: self.log.logMessage('Problems ingesting FM file ' + file.name + ' ' +str(e)) else: self.processGDLData(file.name, fileContentString) if file.name.lower().endswith('.xlsx') and not file.name.startswith('.'): #do something ihc = ihc_report.IHC_Report(file.name,self.config['remotehostname'], self.config['remoteusername'], \ self.config['remotepassword'], self.config['remotedbname'],self.config['fileuser'],self.config['filekey'], self.data, self.config['logblob']) data=ihc.ingest() if data is not None: ihc.deleteFile() if file.name.lower().endswith('.json') and not file.name.startswith('.'): fileContent = self.file_service.get_file_to_text(self.data, None, file.name) fileContentString = fileContent.content if 'clinical' in fileContentString: cd = clinical_json.ClinicalDataJson(file.name,self.config['remotehostname'], self.config['remoteusername'], \ self.config['remotepassword'], self.config['remotedbname'],self.config['fileuser'],self.config['filekey'],self.data, self.config['logblob']) if cd.ingest()==0: cd.deleteFile() #self.processChristieFile(file.name) processedFiles = processedFiles+1 elif 'genomic' in fileContentString: genomic = genomicdata_json.GenomicDataJson(file.name,self.config['remotehostname'], self.config['remoteusername'], \ self.config['remotepassword'], self.config['remotedbname'],self.config['fileuser'],self.config['filekey'],self.data, self.config['logblob']) if genomic.ingest()==0: genomic.deleteFile() processedFiles = processedFiles+1 else: self.log.logMessage('json file type not recognised ' + str(file)) self.log.systemStatusUpdate(file.name, 'unknown', self.timestamp(), 'Error: JSON file type not recognised') except Exception as e: self.log.logMessage('exception occurred ' + str(file) + " " + str(e)) if processedFiles == 0: self.log.logMessage('No files to process.') self.log.logMessage('File list:'+",".join(map(str, files))) def processBloodFile(self, filename): self.log.logMessage('Fetching file: '+filename) #with open(self.filepath+filename) as csvfile: #readCSV = csv.reader(csvfile, delimiter=',') csvfile = self.file_service.get_file_to_text(self.data, None, filename) csvString = csvfile.content csvString = csvString.splitlines() readCSV = csv.reader(csvString, delimiter=',', dialect=csv.excel_tab) # Get the row count so we know where to trim for the genes rowCount = sum(1 for row in csv.reader(csvString, delimiter=',', dialect=csv.excel_tab)) #csvfile.seek(0) self.log.logMessage('Row count is: '+str(rowCount)) # Patient data from the file patientDetails = { 'patientID': '', 'baseline': '', 'runNumber': '', 'sampleType': '', 'reportIssued': '', 'ngsRun': '', 'ngsSampleType': '', 'pipelineVersion': '', 'ngsLibraryCFDNAInput': '', 'averageReadDepth': '', 'colourGreenCFDNA': '', 'colourYellowCFDNA': '', 'colourRedCFDNA': '', 'colourGreenReadDepth': '', 'colourYellowReadDepth': '', 'colourRedReadDepth': '', 'detectionLevel': '', 'ngsComment': '', 'exploratoryComment': '', 'geneData': {} } # Loop over the rows and update the patientDetails baselineNumberKey = { 'Baseline1': 1, 'B': 1, 'Baseline2': 2, 'B2': 2, 'DT1': 3, 'DT2': 4, 'DT3': 5, 'DT4': 6, 'DT5': 7, 'DT6': 8, 'DT7': 9, 'DT8': 10, 'DT9': 11, 'DT10': 12, 'DT11': 13, 'DT12': 14, 'DT13': 15, 'DP1': 16, 'DP2': 17, 'DP3': 18, 'EoT': 19, 'EoT2':20, 'EoT3':21 } geneRowsStart = rowCount-19 currentRow = 1 currentGeneRow = 1 for row in readCSV: # Get the patient ID if 'Patient_ID' in row[0]: patientDetails['patientID'] = row[1] # Get the baseline number if 'Visit' in row[0]: try: if row[1].startswith('NT'): row[1]=row[1][3:] if (not 'baseline' in patientDetails or len(str(patientDetails['baseline'])) ==0): patientDetails['baseline'] = baselineNumberKey[row[1]] print('Baseline number is: '+str(patientDetails['baseline'])) except Exception as e: self.log.systemStatusUpdate(filename, 'CEP', self.timestamp(), 'Error: Baseline missing or incorrect') print(str(e)) return 0 # Get the run number if 'Run' == row[0]: patientDetails['runNumber'] = row[1] # Get the sample type if 'Sample Type' in row[0]: patientDetails['sampleType'] = row[1] # Get the date report issued if 'Date Report Issued' in row[0]: patientDetails['reportIssued'] = row[1] # Get the NGS run if 'NGS Run' in row[0]: patientDetails['ngsRun'] = row[1] # Get the NGS sample type if 'NGS Sample Type' in row[0]: patientDetails['ngsSampleType'] = row[1] # Get the Bioinformatics Pipeline if 'Bioinformatics Pipeline' in row[0]: patientDetails['pipelineVersion'] = row[1] # Get the NGS Library cfDNA input if 'NGS Library cfDNA Input' in row[0]: patientDetails['ngsLibraryCFDNAInput'] = row[1] # Get the Average Read Depth if 'Average Read Depth' in row[0]: patientDetails['averageReadDepth'] = row[1] # Get the Colour Green cfDNA if 'Colour Green cfDNA' in row[0]: patientDetails['colourGreenCFDNA'] = row[1] # Get the Colour Yellow cfDNA if 'Colour Yellow cfDNA' in row[0]: patientDetails['colourYellowCFDNA'] = row[1] # Get the Colour Red cfDNA if 'Colour Red cfDNA' in row[0]: patientDetails['colourRedCFDNA'] = row[1] # Get the Colour Green Read Depth if 'Colour Green Read Depth' in row[0]: patientDetails['colourGreenReadDepth'] = row[1] # Get the Colour Yellow Read Depth if 'Colour Yellow Read Depth' in row[0]: patientDetails['colourYellowReadDepth'] = row[1] # Get the Colour Red Read Depth if 'Colour Red Read Depth' in row[0]: patientDetails['colourRedReadDepth'] = row[1] # Get the Detection Level if 'Detection Level' in row[0]: patientDetails['detectionLevel'] = row[1] # Get the NGS comment if 'Comments for NGS Subset' in row[0]: patientDetails['ngsComment'] = row[1].replace("'","''") # Get the Exploratory comment if 'Comment for Exploratory Subset' in row[0]: patientDetails['exploratoryComment'] = row[1].replace("'","''") # Get the gene rows as lists if currentRow > 20: # Add the gene data patientDetails['geneData'][currentGeneRow] = row # Increment the tracking value currentGeneRow = currentGeneRow+1 # Increment the tracking value currentRow = currentRow+1 #for i in patientDetails: #print(str(patientDetails[i])) #self.log.logMessage(i +' '+ patientDetails[i]) #print('--------->'+str(patientDetails['runNumber'])) #print(len(patientDetails['runNumber'])) if len(patientDetails['runNumber']) == 0: self.log.systemStatusUpdate(filename, 'CEP', self.timestamp(), 'Error: Run number missing') return 0 else: #check if re-submit and delete old data if patient was not discussed since first submission resubmission,prev_gene_panel_id=self.checkResubmission(patientDetails) print('Resubmission: ' + str(resubmission)) #if resubmission == 2: # self.log.systemStatusUpdate(filename, 'CEP', self.timestamp(), 'Error: Resubmission after patient being discussed') # self.log.logMessage(filename + ' Error: Resubmission after patient being discussed') # return 0 if resubmission == 1: self.deleteOldBloodReport(patientDetails) self.log.systemStatusUpdate(filename, 'CEP', self.timestamp(), "Resubmit of file -- delete old content") self.log.logMessage(filename + ' Resubmit of file -- delete old content') self.updateDatabaseBloodReport(patientDetails, filename, (resubmission==2), prev_gene_panel_id) # checkResubmission returns 0 if no resubmission; 1 if resubmission can overwrite; 2 if resubmission requires new version def checkResubmission(self, patientDetails): checkBaselineRunSQL = "select measurement_gene_panel_id,ingestion_date from MEASUREMENT_GENE_PANEL as mgp LEFT JOIN SPECIMEN s on mgp.specimen_id=s.specimen_id LEFT JOIN PERSON p on s.person_id=p.person_id LEFT JOIN CONCEPT_DATA_SOURCES gp on gp.data_source_concept_id = mgp.data_source_concept_id where p.target_id='"+patientDetails['patientID']+"' and mgp.baseline_number="+str(patientDetails['baseline'])+" and mgp.run_number="+patientDetails['runNumber']+" and mgp.ngs_run!='GDL' and gp.panel_name!='foundationmedicine' order by measurement_gene_panel_id desc;" cursorRun = self.conn.cursor() cursorRun.execute(checkBaselineRunSQL) row = cursorRun.fetchone() cursorRun.close() if row is None: return 0,0 measurement_gene_panel_id=row[0]; ingestion_date=None if row[1] is not None: ingestion_date= row[1]#datetime.datetime.strptime(row[1],'%Y-%m-%d H:%M:%S.%fZ') old_date = "select date_issued from REPORT where measurement_gene_panel_id = " +str(measurement_gene_panel_id)+";" cursorReport = self.conn.cursor() cursorReport.execute(old_date) row = cursorReport.fetchone() if row is not None: bloodReportDate = row[0]#datetime.datetime.strptime(row[0],'%Y-%m-%d') lastReportSQL = "SELECT created_on FROM MEETING_OUTCOME as mo LEFT JOIN PERSON as p on p.person_id=mo.person_id where p.target_id='"+patientDetails['patientID']+"' ORDER BY created_on DESC;" cursor = self.conn.cursor() cursor.execute(lastReportSQL) row = cursor.fetchone(); cursor.close() if row is not None: lastDiscussedDate = row[0] if ingestion_date is not None: if lastDiscussedDate>=ingestion_date: return 2,measurement_gene_panel_id else: return 1,measurement_gene_panel_id if bloodReportDate is None: bloodReportDate = datetime.datetime.strptime(patientDetails['reportIssued'], '%d/%m/%Y') if lastDiscussedDate>=bloodReportDate: return 2,measurement_gene_panel_id return 1,measurement_gene_panel_id # checkResubmission returns 0 if no resubmission; 1 if resubmission can overwrite; 2 if resubmission requires new version def checkResubmissionTumour(self, specimen_id, baseline, run): checkBaselineRunSQL = "select measurement_gene_panel_id, s.person_id, ingestion_date from MEASUREMENT_GENE_PANEL as mgp LEFT JOIN SPECIMEN s on mgp.specimen_id=s.specimen_id LEFT JOIN PERSON p on s.person_id=p.person_id where mgp.specimen_id="+str(specimen_id)+" and mgp.baseline_number="+str(baseline)+" and mgp.run_number="+str(run)+" and ngs_run='GDL' order by measurement_gene_panel_id desc;" cursorRun = self.conn.cursor() cursorRun.execute(checkBaselineRunSQL) row = cursorRun.fetchone() cursorRun.close() if row is None: return 0,0 measurement_gene_panel_id=row[0]; ingestion_date=None person_id=None if row[2] is not None: ingestion_date=row[2] if row[1] is not None: person_id=row[1] lastReportSQL = "SELECT created_on FROM MEETING_OUTCOME as mo where mo.person_id="+str(person_id)+" ORDER BY created_on DESC;" cursor = self.conn.cursor() cursor.execute(lastReportSQL) row = cursor.fetchone(); cursor.close() if row is not None: lastDiscussedDate = row[0] if ingestion_date is not None: if lastDiscussedDate>=ingestion_date: return 2,measurement_gene_panel_id else: return 1,measurement_gene_panel_id return 1,measurement_gene_panel_id def deleteOldBloodReport(self, patientDetails): checkBaselineRunSQL = "select measurement_gene_panel_id from MEASUREMENT_GENE_PANEL as mgp LEFT JOIN SPECIMEN s on mgp.specimen_id=s.specimen_id LEFT JOIN PERSON p on s.person_id=p.person_id LEFT JOIN CONCEPT_DATA_SOURCES gp on gp.data_source_concept_id = mgp.data_source_concept_id where p.target_id='"+patientDetails['patientID']+"' and mgp.baseline_number="+str(patientDetails['baseline'])+" and mgp.run_number="+patientDetails['runNumber']+" and mgp.ngs_run!='GDL' and gp.panel_name!='foundationmedicine' ORDER BY measurement_gene_panel_id DESC;" cursorRun = self.conn.cursor() cursorRun.execute(checkBaselineRunSQL) row= cursorRun.fetchone() cursorRun.close() if row is not None: measurement_gene_panel_id=row[0]; selectGeneVariantSQL = "select measurement_gene_variant_id from MEASUREMENT_GENE_VARIANT where measurement_gene_panel_id="+str(measurement_gene_panel_id)+";" cursorSelectGeneVariant = self.conn.cursor() cursorSelectGeneVariant.execute(selectGeneVariantSQL); gene_variant_ids=cursorSelectGeneVariant.fetchall(); for row2 in gene_variant_ids: gene_variant_id=row2[0] deletedSelectedGeneVariant = "DELETE FROM SELECTED_GENE_VARIANT WHERE measurement_gene_variant_id="+str(gene_variant_id)+";" cursorDeletedSelectedGeneVariant = self.conn.cursor() cursorDeletedSelectedGeneVariant.execute(deletedSelectedGeneVariant) # remove data from measurement_gene_variant for this measurement_gene_panel_id deleteGeneVariantSQL = "DELETE FROM MEASUREMENT_GENE_VARIANT where measurement_gene_panel_id="+str(measurement_gene_panel_id)+";" cursorDeleteVariant = self.conn.cursor() cursorDeleteVariant.execute(deleteGeneVariantSQL) deleteReportSQL = "DELETE FROM REPORT where measurement_gene_panel_id="+str(measurement_gene_panel_id)+";" cursorDeleteReport = self.conn.cursor() cursorDeleteReport.execute(deleteReportSQL) deleteGenePanelSQL = "DELETE FROM MEASUREMENT_GENE_PANEL where measurement_gene_panel_id="+str(measurement_gene_panel_id)+";" cursorDeleteGenePanel = self.conn.cursor() cursorDeleteGenePanel.execute(deleteGenePanelSQL) def deleteOldTumourReport(self, measurement_gene_panel_id): selectGeneVariantSQL = "select measurement_gene_variant_id from MEASUREMENT_GENE_VARIANT where measurement_gene_panel_id="+str(measurement_gene_panel_id)+";" cursorSelectGeneVariant = self.conn.cursor() cursorSelectGeneVariant.execute(selectGeneVariantSQL); gene_variant_ids=cursorSelectGeneVariant.fetchall(); for row2 in gene_variant_ids: gene_variant_id=row2[0] deletedSelectedGeneVariant = "DELETE FROM SELECTED_GENE_VARIANT WHERE measurement_gene_variant_id="+str(gene_variant_id)+";" cursorDeletedSelectedGeneVariant = self.conn.cursor() cursorDeletedSelectedGeneVariant.execute(deletedSelectedGeneVariant) # remove data from measurement_gene_variant for this measurement_gene_panel_id deleteGeneVariantSQL = "DELETE FROM MEASUREMENT_GENE_VARIANT where measurement_gene_panel_id="+str(measurement_gene_panel_id)+";" cursorDeleteVariant = self.conn.cursor() cursorDeleteVariant.execute(deleteGeneVariantSQL) deleteReportSQL = "DELETE FROM REPORT where measurement_gene_panel_id="+str(measurement_gene_panel_id)+";" cursorDeleteReport = self.conn.cursor() cursorDeleteReport.execute(deleteReportSQL) deletePathLabRefSQL = "DELETE FROM PATH_LAB_REF where measurement_gene_panel_id="+str(measurement_gene_panel_id)+";" cursorDeletePathLabRef = self.conn.cursor() cursorDeletePathLabRef.execute(deletePathLabRefSQL) deleteGenePanelSQL = "DELETE FROM MEASUREMENT_GENE_PANEL where measurement_gene_panel_id="+str(measurement_gene_panel_id)+";" cursorDeleteGenePanel = self.conn.cursor() cursorDeleteGenePanel.execute(deleteGenePanelSQL) def updateDatabaseBloodReport(self, patientDetails, filename, updateSelection, prev_gene_panel_id): # Check if patient exists checkSQL = "SELECT * FROM dbo.PERSON WHERE target_id='"+patientDetails['patientID']+"'" cursor = self.conn.cursor() cursor.execute(checkSQL) foundPerson = 'No' personID = 0 for row in cursor: foundPerson = 'Yes' personID = row[0] self.log.logMessage('Is patient '+patientDetails['patientID']+' in the database? '+foundPerson) if foundPerson == 'Yes': findGeneConceptID = "SELECT data_source_concept_id from CONCEPT_DATA_SOURCES WHERE panel_name='"+patientDetails['ngsSampleType']+"'" cursor = self.conn.cursor() cursor.execute(findGeneConceptID) row=cursor.fetchone() geneConceptID=0 if row is not None: geneConceptID=row[0] else: insertGeneConcept = "INSERT INTO CONCEPT_DATA_SOURCES (panel_name) VALUES('"+patientDetails['ngsSampleType']+"')" cursor = self.conn.cursor() cursor.execute(insertGeneConcept) geneConceptID=cursor.lastrowid # Get the lastest blood specimen to attach the report to #specimenSQL = "SELECT TOP 1 * FROM SPECIMEN WHERE person_id = "+str(personID)+" AND specimen_concept_id=1 ORDER BY specimen_date DESC" specimenSQL = "SELECT * FROM SPECIMEN WHERE person_id = "+str(personID)+" AND specimen_concept_id=1 AND baseline_number = "+str(patientDetails['baseline'])+" ORDER BY specimen_date DESC" cursor = self.conn.cursor() cursor.execute(specimenSQL) row = cursor.fetchone() #print('row count for '+patientDetails['patientID']+' is: '+str(row.rowcount)) # Check a result has been returned (if not there is no blood sample in the database for this person) if row is not None: specimenID = 0 specimenID = row[0] self.log.logMessage('Adding report for person ID '+str(personID)+' ('+patientDetails['patientID']+') against specimen_id '+str(specimenID)) # Update the existing patient data from the file data cfdnaColourKey = patientDetails['colourGreenCFDNA']+';'+patientDetails['colourYellowCFDNA']+';'+patientDetails['colourRedCFDNA'] averageColourKey = patientDetails['colourGreenReadDepth']+';'+patientDetails['colourYellowReadDepth']+';'+patientDetails['colourRedReadDepth'] # Record whether analysis failed analysisFailed = 'false' if patientDetails['geneData'][1][0] == 'Failed': # Analysis failed analysisFailed = 'true' # Record if there were no mutations found noMutations = 'false' if patientDetails['geneData'][1][0] == 'None': # No mutations found noMutations = 'true' # Update MEASUREMENT_GENE_PANEL table update1Cursor = self.conn.cursor() update1SQL = "INSERT INTO MEASUREMENT_GENE_PANEL (specimen_id, data_source_concept_id, average_read_depth, bioinformatics_pipeline, ngs_library_cfdna_input, ngs_run, level_of_detection, cfdna_input_colour_key, average_read_depth_colour_key, analysis_failed, no_mutations_found, ngs_comment, exploratory_comment, run_number, baseline_number) VALUES('"+str(specimenID)+"', '"+str(geneConceptID)+"', '"+patientDetails['averageReadDepth']+"', '"+patientDetails['pipelineVersion']+"', '"+patientDetails['ngsLibraryCFDNAInput']+"', '"+patientDetails['ngsRun']+"', '"+patientDetails['detectionLevel']+"', '"+cfdnaColourKey+"', '"+averageColourKey+"', '"+analysisFailed+"', '"+noMutations+"', '"+patientDetails['ngsComment']+"', '"+patientDetails['exploratoryComment']+"', '"+patientDetails['runNumber']+"', "+str(patientDetails['baseline'])+")" update1SQLID = 0 try: update1Cursor.execute(update1SQL) #self.log.logMessage('Updated MEASUREMENT_GENE_PANEL with '+str(update1Cursor.rowcount)+' row(s)') update1SQLID = update1Cursor.lastrowid self.log.logMessage('Created MEASUREMENT_GENE_PANEL record ID: '+str(update1SQLID)) except Exception as e: self.log.logMessage('Database MEASUREMENT_GENE_PANEL update failed, exiting...') self.log.logMessage('Error message: ') self.log.logMessage(str(e)) # Update MEASUREMENT_GENE_VARIANT table if patientDetails['geneData'][1][0] != 'Failed' and patientDetails['geneData'][1][0] != 'None': for i in patientDetails['geneData']: if "sep-" in patientDetails['geneData'][i][0].lower(): patientDetails['geneData'][i][0]=patientDetails['geneData'][i][0].lower().replace('sep-0','SEPT'); patientDetails['geneData'][i][0]=patientDetails['geneData'][i][0].replace('sep-','SEPT'); # Mutations found, add the gene data for the patient selectGeneCursor = self.conn.cursor() selectGeneCursor.execute("SELECT * FROM CONCEPT_GENE WHERE gene_name = '"+patientDetails['geneData'][i][0]+"'") gene_concept_id = '' # If the gene exists in the table, get the ID, else add it to the table and use the ID selectGeneCursorRowcount = 0 gene_concept_id = 0 for row in selectGeneCursor: selectGeneCursorRowcount = selectGeneCursorRowcount+1 gene_concept_id = row[0] if selectGeneCursorRowcount == 0: selectGeneCursor = self.conn.cursor() selectGeneCursor.execute("INSERT INTO CONCEPT_GENE (gene_name) VALUES('"+patientDetails['geneData'][i][0]+"')") gene_concept_id = selectGeneCursor.lastrowid update2Cursor = self.conn.cursor() update2SQL = "INSERT INTO MEASUREMENT_GENE_VARIANT (measurement_gene_panel_id, gene_concept_id, cosmic_url, variant_allele_frequency, read_depth, germline_frequency, functional_effect, cdna_change, amino_acid_change, chromosome, high_confidence, ngs_gene_present, is_specific_mutation_in_panel, position) VALUES("+str(update1SQLID)+", '"+str(gene_concept_id)+"', '"+patientDetails['geneData'][i][11].replace("'",'"')+"', '"+patientDetails['geneData'][i][4]+"', '"+patientDetails['geneData'][i][5]+"', '"+patientDetails['geneData'][i][6]+"', '"+patientDetails['geneData'][i][7]+"', '"+patientDetails['geneData'][i][2]+"', '"+patientDetails['geneData'][i][3]+"', '"+patientDetails['geneData'][i][1]+"', '"+patientDetails['geneData'][i][8]+"', '"+patientDetails['geneData'][i][9]+"', '"+patientDetails['geneData'][i][10]+"', '"+patientDetails['geneData'][i][12]+"')" #print(patientDetails['geneData'][i][12]) try: update2Cursor.execute(update2SQL) #self.log.logMessage('Updated MEASUREMENT_GENE_VARIANT with '+str(update2Cursor.rowcount)+' row(s)') measurement_gene_variant_id = update2Cursor.lastrowid self.log.logMessage('Created MEASUREMENT_GENE_VARIANT record ID: '+str(measurement_gene_variant_id)) except Exception as e: self.log.logMessage('Database MEASUREMENT_GENE_VARIANT update failed, exiting...') self.log.logMessage('Error message: ') self.log.logMessage(str(e)) # new version of old blood report keep selections if updateSelection == True: findPreviouseSelectionQuery="select person_id from SELECTED_GENE_VARIANT FULL OUTER JOIN MEASUREMENT_GENE_VARIANT on MEASUREMENT_GENE_VARIANT.measurement_gene_variant_id=SELECTED_GENE_VARIANT.measurement_gene_variant_id FULL OUTER JOIN MEASUREMENT_GENE_PANEL on MEASUREMENT_GENE_PANEL.measurement_gene_panel_id=MEASUREMENT_GENE_VARIANT.measurement_gene_panel_id where person_id="+str(personID)+" and type='CTDNA' and baseline_number="+str(patientDetails['baseline'])+" and run_number="+str(patientDetails['runNumber'])+" and MEASUREMENT_GENE_PANEL.measurement_gene_panel_id=" +str(prev_gene_panel_id)+ " and position='"+patientDetails['geneData'][i][12] +"';" findPreviousSelectionCursor = self.conn.cursor() findPreviousSelectionCursor.execute(findPreviouseSelectionQuery) row=findPreviousSelectionCursor.fetchone() findPreviousSelectionCursor.close() if row is not None: #was previously selected insertSelectionQuery="insert into SELECTED_GENE_VARIANT (person_id, measurement_gene_variant_id, type) values("+str(personID)+","+str(measurement_gene_variant_id)+",'CTDNA');" insertSelectionCursor = self.conn.cursor() insertSelectionCursor.execute(insertSelectionQuery) insertSelectionCursor.close() # Insert report data reportIssued = self.formatDate(patientDetails['reportIssued']) update3Cursor = self.conn.cursor() update3SQL = "INSERT INTO REPORT (date_issued, measurement_gene_panel_id) VALUES('"+reportIssued+"','"+str(update1SQLID)+"')" try: update3Cursor.execute(update3SQL) #self.log.logMessage('Updated REPORT with '+str(update3Cursor.rowcount)+' row(s)') update3SQLID = update3Cursor.lastrowid self.log.logMessage('Created REPORT record ID: '+str(update3SQLID)) except Exception as e: self.log.logMessage('Database REPORT update failed.') self.log.logMessage('Error message: ') self.log.logMessage(str(e)) # Delete the file once processed if update1Cursor.rowcount > 0 and update3Cursor.rowcount > 0: # All should be OK, we can commit changes. self.conn.commit() # Both updates were successful... # Delete the file self.log.logMessage('Deleting '+filename) try: #os.remove(self.filepath+filename) self.file_service.delete_file(self.data, None, filename) self.log.systemStatusUpdate(filename, 'CEP', self.timestamp(), 'Success') self.conn.commit() except: self.log.logMessage('There was a problem deleting '+filename) else: self.log.logMessage('Error: The cfDNA blood report did not update properly in the database for '+patientDetails['patientID']) self.log.systemStatusUpdate(filename, 'CEP', self.timestamp(), 'Error: Data import failed') self.conn.rollback() else: self.log.logMessage(' ######## ERROR: There is no blood sample for patient '+patientDetails['patientID']+' ########') self.log.logMessage(' ######## Skipping import for this patient '+patientDetails['patientID']+' ########') self.log.systemStatusUpdate(filename, 'CEP', self.timestamp(), 'Error: No specimen data') self.conn.rollback() else: self.log.logMessage(" ######## ERROR: Patient "+patientDetails['patientID']+" doesn't exist. ########") self.log.systemStatusUpdate(filename, 'CEP', self.timestamp(), 'Error: Patient is not present in the database') self.conn.rollback() def processIHCPictures(self, filename): self.log.logMessage('Sending IHC image '+filename+' to the web app reports folder.') pict = self.file_service.get_file_to_bytes(self.data, None, filename) upload_name=filename[0:-4]+filename[-4:].lower() self.pdf_blob_service.create_container(self.container_name) version=0 prev_versions=self.pdf_blob_service.list_blobs(self.container_name, filename[0:-4]) props=None #find the next version number for v in prev_versions: v_version=v.name[len(filename[0:-4]):-4] if v_version is not None and len(v_version)>0: d=re.search("\d", v_version) if d is not None and version<=int(d.group(0)): version=int(d.group(0)) props=v.properties else: props=v.properties print(props) if props is not None: lastModified=props.last_modified.replace(tzinfo=datetime.timezone.utc) lastMeetingSQL="SELECT created_on FROM MEETING_OUTCOME as mo LEFT JOIN SPECIMEN on mo.person_id=SPECIMEN.person_id where SPECIMEN.preclin_id='"+filename[0:-8]+"' order by created_on desc" cursorRun = self.conn.cursor() cursorRun.execute(lastMeetingSQL) row = cursorRun.fetchone() cursorRun.close() if row is not None: lastMeetingDate=row[0].replace(tzinfo=datetime.timezone.utc) if lastMeetingDate>lastModified: #amend filename with version number upload_name=filename[:-4]+'_v'+str(version+1)+filename[-4:] self.log.logMessage('New version ' + upload_name) #delete previous blob else: if version!=0: upload_name=filename[:-4]+'_v'+str(version)+filename[-4:] self.pdf_blob_service.delete_blob(self.container_name, upload_name) print(upload_name) self.pdf_blob_service.create_blob_from_bytes(self.container_name,upload_name, pict.content) self.file_service.delete_file(self.data, None, filename) self.log.logMessage('Success ' + filename) self.log.systemStatusUpdate(filename, 'IHC', self.log.timestamp(), "Success") # Move the pdf file to the web app def processPDFReport(self, filename): self.log.logMessage('Sending '+filename+' to the web app reports folder.') f=filename filename=filename[0:-4]+filename[-4:].lower() #move to azure storage self.pdf_blob_service.create_container(self.container_name) self.pdf_blob_service.create_blob_from_path(self.config['containername'],filename,self.filepath+'/tmp/'+f); # Delete the copies self.log.logMessage('Deleting PDF report file '+filename) # This throws an error, even though it works correctly. try: if self.file_service.exists(self.data, None, filename): self.file_service.delete_file(self.data, None, filename) if os.path.exists(self.filepath+'/tmp/'+f): os.remove(self.filepath+'/tmp/'+f) if re.search("FM.PDF|FMV[1-9].PDF", filename.upper()): if self.checkFMPDF(filename): self.log.systemStatusUpdate(filename, 'FM', self.timestamp(), 'Success') elif re.search("QCI.PDF|QCIV[1-9].PDF", filename.upper()): if self.checkQCIPDF(filename): self.log.systemStatusUpdate(filename, 'QCI', self.timestamp(), 'Success') # elif re.search("[A-Z]{3}\d{7}T1SM(.)*\.PDF", filename.upper()): elif re.search("^[A-Z]{3}\d{7}(.)*SM(.)*\.PDF", filename.upper()): print('match rna') if self.checkRNAPDF(filename): self.log.systemStatusUpdate(filename, 'RNA', self.timestamp(), 'Success') elif self.checkGDLPDF(filename): self.log.systemStatusUpdate(filename, 'GDL', self.timestamp(), 'Success') elif self.checkOtherSourcePDF(filename): self.log.systemStatusUpdate(filename, 'PDF', self.timestamp(), 'Success') except Exception as inst: self.log.logMessage(str(type(inst))) self.log.logMessage(str(inst)) self.log.logMessage('There was a problem deleting '+filename) if filename.upper().endswith('FM.PDF'): self.log.systemStatusUpdate(filename, 'FM', self.timestamp(), 'Error: Moving the PDF reports failed.') elif filename.upper().endswith('QCI.PDF'): self.log.systemStatusUpdate(filename, 'QCI', self.timestamp(), 'Error: Moving the PDF reports failed.') else: self.log.systemStatusUpdate(filename, 'GDL', self.timestamp(), 'Error: Moving the PDF reports failed.') def checkFMPDF(self, filename): try: print(filename) #find numeric patient id patientid = int(re.search(r'\d+', filename).group()) try: patternBlood = re.compile("^[A-Z]{3}\d{7}T\d{1,2}FM") patternTumour = re.compile("^[A-Z]{3}\d{7}Bx\d{1,2}FM") if not re.match(patternBlood, filename) and not re.match(patternTumour,filename): self.log.systemStatusUpdate(filename, 'FM', self.timestamp(), 'Warning: the file has been uploaded but filename does not conform to naming convention') return False timepointInt = re.findall(r'\d+',filename[10:]) print(timepointInt) if re.match(patternBlood, filename): SQL = "SELECT specimen_id FROM SPECIMEN left join PERSON on PERSON.person_id=SPECIMEN.person_id WHERE specimen_concept_id=1 and baseline_number = "+str(timepointInt[0])+" and target_id='"+filename[:10]+"'" else: SQL = "SELECT specimen_id FROM SPECIMEN left join PERSON on PERSON.person_id=SPECIMEN.person_id WHERE specimen_concept_id!=1 and baseline_number = "+str(timepointInt[0])+" and target_id='"+filename[:10]+"'" except (ValueError, KeyError, AssertionError) as e: #no blood structure try tumour print(filename[:filename.upper().index('FM')]) assert len(filename[:filename.upper().index('FM')]) >0, "Sample missing in filename" SQL = "SELECT specimen_id FROM SPECIMEN WHERE preclin_id = '"+str(filename[:filename.upper().index('FM')])+"'" print(SQL) cursor = self.conn.cursor() cursor.execute(SQL) row = cursor.fetchone() print(row) if row is not None: return True self.log.systemStatusUpdate(filename, 'FM', self.timestamp(), 'Warning: the file has been uploaded but unable to match file to sample/timepoint') return False except Exception as e: self.log.logMessage(str(type(e))) self.log.logMessage(str(e)) print(e) self.log.systemStatusUpdate(filename, 'FM', self.timestamp(), 'Warning: ' + str(e)) return False def checkRNAPDF(self, filename): try: rna_re="^[A-Z]{3}\d{7}T1SM" if not re.match(rna_re, filename): self.log.systemStatusUpdate(filename, 'RNA', self.timestamp(), 'Warning: the file has been uploaded but unable to match to a timepoint as it can only be T1 at present') return False siteId=filename[3:6] getSite = self.conn.cursor() getSite.execute("SELECT * FROM CARE_SITE WHERE care_site_id = "+siteId) row=getSite.fetchone() if row is None: self.log.systemStatusUpdate(filename, 'RNA', self.timestamp(), 'Warning: the file has been uploaded but unable to match file to a site') return False SQL = "SELECT specimen_id FROM SPECIMEN left join PERSON on PERSON.person_id=SPECIMEN.person_id WHERE specimen_concept_id=1 and baseline_number = 1 and target_id='"+filename[:10]+"'" cursor = self.conn.cursor() cursor.execute(SQL) row = cursor.fetchone() print(row) cursor.close() if row is None: self.log.systemStatusUpdate(filename, 'RNA', self.timestamp(), 'Warning: the file has been uploaded but unable to match file to patient') return False else: return True except Exception as e: self.log.logMessage(str(type(e))) self.log.logMessage(str(e)) self.log.systemStatusUpdate(filename, 'RNA', self.timestamp(), 'Warning: the file has been uploaded but exception occurred '+ str(e)) print(e) return False def checkOtherSourcePDF(self, filename): print('start checkOtherSourcePDF '+ filename) try: print(filename) patternBlood = re.compile("^[A-Z]{3}\d{7}T\d{1,2}[a-zA-Z]{2}") patternTumour = re.compile("^[A-Z]{3}\d{7}Bx\d{1,2}[a-zA-Z]{2}") assert re.match(patternBlood, filename) or re.match(patternTumour, filename), "Filename has wrong structure" siteId=filename[3:6] getSite = self.conn.cursor() getSite.execute("SELECT * FROM CARE_SITE WHERE care_site_id = "+siteId) row=getSite.fetchone() if row is None: self.log.systemStatusUpdate(filename, 'PDF', self.timestamp(), 'Warning: the file has been uploaded but unable to match file to a site') return False timepointInt = re.findall(r'\d+',filename[10:]) print(timepointInt) if re.match(patternBlood, filename): SQL = "SELECT specimen_id FROM SPECIMEN left join PERSON on PERSON.person_id=SPECIMEN.person_id WHERE specimen_concept_id=1 and baseline_number = "+str(timepointInt[0])+" and target_id='"+filename[:10]+"'" else: SQL = "SELECT specimen_id FROM SPECIMEN left join PERSON on PERSON.person_id=SPECIMEN.person_id WHERE specimen_concept_id!=1 and baseline_number = "+str(timepointInt[0])+" and target_id='"+filename[:10]+"'" print(SQL) cursor = self.conn.cursor() cursor.execute(SQL) row = cursor.fetchone() print(row) if row is None: self.log.systemStatusUpdate(filename, 'PDF', self.timestamp(), 'Warning: the file has been uploaded but unable to match file to sample') return False #check two letter code shortCode=re.findall(r'[a-zA-Z]{2}', filename[12:]) assert len(shortCode)>0, "No two char source code found" SQL = "select short_code from CONCEPT_DATA_SOURCES where short_code='"+str(shortCode[0])+"'" print(SQL) cursor.execute(SQL) row = cursor.fetchone() print(row) if row is not None: return True self.log.systemStatusUpdate(filename, 'PDF', self.timestamp(), 'Warning: the file has been uploaded but unable to match file to source') return False except Exception as e: self.log.logMessage(str(type(e))) self.log.logMessage(str(e)) self.log.systemStatusUpdate(filename, 'PDF', self.timestamp(), 'Warning: the file has been uploaded but filename does not conform to naming convention') print(e) return False def checkQCIPDF(self, filename): try: print('filename ', filename) siteId=filename[3:6] getSite = self.conn.cursor() getSite.execute("SELECT * FROM CARE_SITE WHERE care_site_id = "+siteId) row=getSite.fetchone() if row is None: self.log.systemStatusUpdate(filename, 'QCI', self.timestamp(), 'Warning: the file has been uploaded but unable to match file to a site') return False SQL = "SELECT person_id FROM PERSON WHERE target_id='"+filename[:10]+"'" cursor = self.conn.cursor() cursor.execute(SQL) row = cursor.fetchone() print(row) cursor.close() if row is None: self.log.systemStatusUpdate(filename, 'QCI', self.timestamp(), 'Warning: the file has been uploaded but unable to match file to patient') return False pattern = re.compile("^[A-Z]{3}\d{7}T\d{1,2}QCI.PDF") assert re.match(pattern, filename.upper()), "Filename has wrong structure" timepoint = filename[11:filename.upper().index("QCI")] #throghs exception ValueError if not found print('Timepoint ', timepoint) assert timepoint is not None, "Timepoint missing in name" SQL = "SELECT specimen_id FROM SPECIMEN left join PERSON on PERSON.person_id=SPECIMEN.person_id WHERE specimen_concept_id=1 and baseline_number = "+timepoint+" and target_id='"+filename[:10]+"'" print(SQL) cursor = self.conn.cursor() cursor.execute(SQL) row = cursor.fetchone() print(row) if row is not None: return True self.log.systemStatusUpdate(filename, 'QCI', self.timestamp(), 'Warning: the file has been uploaded but sample was not found') return False except Exception as e: self.log.logMessage(str(type(e))) self.log.logMessage(str(e)) print(e) self.log.systemStatusUpdate(filename, 'QCI', self.timestamp(), 'Warning: the file has been uploaded but filename does not conform to naming convention') return False def checkGDLPDF(self, filename): SQL = "SELECT report_id FROM REPORT WHERE filename = '"+str(filename)+"'" cursor = self.conn.cursor() cursor.execute(SQL) row = cursor.fetchone() if row is not None: return True return False def processGDLData(self, filename, xmlString): # Parse the xml string into a usable format try: # Get the patient data root = ET.fromstring(xmlString) assert root.find('.//identifier[@type="Your ref"]') is not None, 'identifier "Your ref" missing' christieNo = root.find('.//identifier[@type="Your ref"]').text assert root.find('.//identifier[@type="Our ref"]') is not None, 'identifier "Our ref" missing' reportRef = root.find('.//identifier[@type="Our ref"]').text assert root.find('.//submitted') is not None, 'report submitted date missing' reportDate = root.find('.//submitted').text assert root.find('.//activated') is not None, 'report activated date missing' gdlDate = root.find('.//activated').text reportDateFormatted = datetime.datetime.strptime(reportDate, '%d-%b-%Y').strftime('%Y-%m-%d') assert root.find('.//identifier') is not None, "Path lab can't be missing or empty" pathLabRef = root.find('.//identifier[@type="Path lab"]').text coverage = 0 if root.find('.//coverage') is not None: coverage = root.find('.//coverage').text coverage = coverage.replace("%", "") comments = 'n/a' if root.find('.//comments') is not None: comments = root.find('.//comments').text comment = 'n/a' if root.find('.//comment') is not None: comment = root.find('.//comment').text # Update the database, measurement_gene_panel table cursor = self.conn.cursor() # Get the specimen ID for this patient hash_object = hashlib.sha256(christieNo.encode('utf-8')) hex_dig = hash_object.hexdigest() print("hex_dig " + str(hex_dig)); #gdlSQL = "SELECT specimen.specimen_id FROM SPECIMEN AS specimen, PERSON AS person WHERE person.hospital_number_hash = '"+hex_dig+"'" gdlSQL = """SELECT TOP 1 specimen.specimen_id FROM SPECIMEN AS specimen left join PERSON AS person on specimen.person_id= person.person_id WHERE specimen.person_id = (SELECT person_id FROM PERSON WHERE person.hospital_number_hash = %s) AND (specimen.specimen_concept_id = 2 OR specimen.specimen_concept_id = 3) AND dbo.RemoveNonAlphaNumChars(tumour_id) like dbo.RemoveNonAlphaNumChars(%s) order by specimen.specimen_id asc""" cursor.execute(gdlSQL, (hex_dig, pathLabRef)) #print(gdlSQL) specimenID = 0 for row in cursor: specimenID = row[0] print('SPECIMEN ID IS: '+str(specimenID)) if specimenID == 0: # No specimen found self.log.logMessage('No specimen ID found for '+filename) raise Exception('No specimen ID found.') else: #check resubmission tumour resubmission,prev_gene_panel_id=self.checkResubmissionTumour(specimenID, 1, 1) print("resubmission: " + str(resubmission) + "prev_gene_panel: " + str(prev_gene_panel_id)) updateSelection=False if resubmission==1: self.deleteOldTumourReport(prev_gene_panel_id); self.log.systemStatusUpdate(filename, 'GDL', self.timestamp(), "Resubmit of file -- delete old content") self.log.logMessage(filename + ' Resubmit of file -- delete old content') if resubmission==2: updateSelection=True #find the latest GDL panel id cursor = self.conn.cursor() genePanelSQL= "select top 1 data_source_concept_id from CONCEPT_DATA_SOURCES where valid_start_date<%s order by valid_start_date desc" cursor.execute(genePanelSQL, (reportDateFormatted)) row=cursor.fetchone() if row is not None: data_source_concept_id=row[0] else: data_source_concept_id=2 # Update MEASUREMENT_GENE_PANEL cursor = self.conn.cursor() gdlSQL = """INSERT INTO MEASUREMENT_GENE_PANEL (specimen_id, data_source_concept_id, coverage, average_read_depth, ngs_run, analysis_failed, no_mutations_found, run_number, baseline_number, comments, unknown_significance) VALUES(%s, %s, %s, 'n/a', 'GDL', 'false', 'false', 1, 1, %s, %s)""" #print(gdlSQL) cursor.execute(gdlSQL, (specimenID, data_source_concept_id, coverage, comments, comment)) measurement_gene_panel_id = cursor.lastrowid #print(gdlSQL) # Update REPORT cursor = self.conn.cursor() reportFilename = reportRef+'.pdf' reportSQL = "INSERT INTO REPORT (date_issued, measurement_gene_panel_id, filename) VALUES(%s, %d, %s)" cursor.execute(reportSQL, (reportDateFormatted, measurement_gene_panel_id, reportFilename)) #print(reportSQL) # Update PATH_LAB_REF cursor = self.conn.cursor() pathSQL = "INSERT INTO PATH_LAB_REF (path_lab_ref, specimen_id, measurement_gene_panel_id) VALUES(%s, %d, %d)" #print(pathSQL) cursor.execute(pathSQL, (pathLabRef, specimenID, measurement_gene_panel_id)) # Update GDL_REQUEST cursor = self.conn.cursor() gdlDateFormatted = datetime.datetime.strptime(gdlDate, '%d-%b-%Y').strftime('%Y-%m-%d') #print(gdlDate) #print(gdlDateFormatted) #reportSQL = "INSERT INTO GDL_REQUEST (date_requested, path_lab_ref, specimen_id, sample_type) VALUES('"+gdlDateFormatted+"', '"+pathLabRef+"', '"+str(specimenID)+"', '')" #reportSQL = "UPDATE GDL_REQUEST SET date_requested='"+gdlDateFormatted+"', path_lab_ref='"+pathLabRef+"' WHERE specimen_id='"+str(specimenID)+"'" #print(reportSQL) #cursor.execute(reportSQL) # Get the gene results data and update MEASUREMENT_GENE_VARIANT geneCount = 0 try: insertSGVarguments=[] for gene in root.iter(tag = 'gene'): geneName = gene.attrib['name'] for variant in gene[0]: cdna = variant[0].text if variant.find('protein') is not None: amino = variant.find('protein').text else: amino = "" if variant.find('reads') is not None: reads = variant.find('reads').text reads = reads.replace("%", "") else: reads = "" cursor = self.conn.cursor() gdlSQL = "INSERT INTO MEASUREMENT_GENE_VARIANT (measurement_gene_panel_id, gene_concept_id, cdna_change, amino_acid_change, variant_allele_frequency) VALUES(%d, (SELECT TOP 1 gene_concept_id FROM CONCEPT_GENE WHERE gene_name = %s), %s, %s, %s)" cursor.execute(gdlSQL,(measurement_gene_panel_id, geneName, cdna, amino, reads)) measurement_gene_variant_id = cursor.lastrowid # new version of old tumour report keep selections if updateSelection == True: findPreviouseSelectionQuery="select person_id from SELECTED_GENE_VARIANT FULL OUTER JOIN MEASUREMENT_GENE_VARIANT on MEASUREMENT_GENE_VARIANT.measurement_gene_variant_id=SELECTED_GENE_VARIANT.measurement_gene_variant_id FULL OUTER JOIN MEASUREMENT_GENE_PANEL on MEASUREMENT_GENE_PANEL.measurement_gene_panel_id=MEASUREMENT_GENE_VARIANT.measurement_gene_panel_id where person_id=(SELECT person_id FROM PERSON WHERE person.hospital_number_hash = '"+hex_dig+"') and type='NGS' and baseline_number=1 and run_number=1 and MEASUREMENT_GENE_PANEL.measurement_gene_panel_id=" +str(prev_gene_panel_id)+ " and gene_concept_id=(SELECT TOP 1 gene_concept_id FROM CONCEPT_GENE WHERE gene_name = '"+str(geneName)+"') and amino_acid_change = '"+str(amino)+"' and cdna_change ='"+str(cdna)+"';" findPreviousSelectionCursor = self.conn.cursor() findPreviousSelectionCursor.execute(findPreviouseSelectionQuery) row=findPreviousSelectionCursor.fetchone() findPreviousSelectionCursor.close() if row is not None: #was previously selected insertSGVarguments.append((hex_dig, measurement_gene_variant_id)) insertSelectionQuery="insert into SELECTED_GENE_VARIANT (person_id, measurement_gene_variant_id, type) values((SELECT person_id FROM PERSON WHERE person.hospital_number_hash = %s),%d,'NGS');" if len(insertSGVarguments)>0: insertSelectionCursor = self.conn.cursor() insertSelectionCursor.executemany(insertSelectionQuery, insertSGVarguments) insertSelectionCursor.close() except Exception as e: print(str(e)) print('Error importing genes.') self.log.logMessage('GDL -- Error importing genes.' +str(e)) raise e # Delete the copies self.log.logMessage('Deleting GDL xml data file '+filename) try: self.conn.commit() if self.file_service.exists(self.data, None, filename): self.file_service.delete_file(self.data, None, filename) self.log.systemStatusUpdate(filename, 'GDL', self.timestamp(), 'Success') except: self.log.logMessage('There was a problem deleting '+filename) except Exception as e: self.log.systemStatusUpdate(filename, 'GDL', self.timestamp(), 'Error: XML file could not be imported.') self.conn.rollback() print(str(e)) return 0 def getConfig(self): # Get the config file details (database login) d = {} with open(self.filepath+"/.config") as f: for line in f: (key, val) = line.split(';',1) d[key] = val.replace('\n', '').strip() return d def timestamp(self): #return datetime.datetime.now().isoformat() return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') def patientLookUp(self, christieNo, targetID): # Check if record already exists with open(self.filepath+"/.users", "a+") as f: patientExists = False; for line in f: # Split the line userData = line.split(':') print(self.decode(userData[1])) # Check the record if targetID == userData[0]: patientExists = True; # If not append it if patientExists == False: f.write(targetID+':'+str(self.encode(christieNo))+'\n') #print('Writing patient data...') #print(targetID+':'+str(self.encode(christieNo))) def encode(self, message): obj = AES.new(self.AKEY, AES.MODE_CFB, self.iv) return base64.urlsafe_b64encode(obj.encrypt(message)) def decode(self, cipher): obj2 = AES.new(self.AKEY, AES.MODE_CFB, self.iv) return obj2.decrypt(base64.urlsafe_b64decode(cipher)) def formatDate(self, dateString): #print(dateString) dateList = dateString.split('/') formattedDate = dateList[2]+'-'+dateList[1]+'-'+dateList[0] if len(dateList[2]) < 3: formattedDate = '20'+formattedDate return formattedDate TargetData()

clns-eTarget_ingest/targetdata.py (874 lines of code) (raw):