def processGDLData()

in clns-eTarget_ingest/targetdata.py [0:0]


    def processGDLData(self, filename, xmlString):
        # Parse the xml string into a usable format
        try:
            # Get the patient data
            root = ET.fromstring(xmlString)
            assert root.find('.//identifier[@type="Your ref"]') is not None, 'identifier "Your ref" missing'
            christieNo = root.find('.//identifier[@type="Your ref"]').text
            assert root.find('.//identifier[@type="Our ref"]') is not None, 'identifier "Our ref" missing'
            reportRef = root.find('.//identifier[@type="Our ref"]').text
            
            assert root.find('.//submitted') is not None, 'report submitted date missing'
            reportDate = root.find('.//submitted').text
            assert root.find('.//activated') is not None, 'report activated date missing'
            gdlDate = root.find('.//activated').text
            
            reportDateFormatted = datetime.datetime.strptime(reportDate, '%d-%b-%Y').strftime('%Y-%m-%d')

            assert root.find('.//identifier') is not None, "Path lab can't be missing or empty"
            pathLabRef = root.find('.//identifier[@type="Path lab"]').text

            coverage = 0
            if root.find('.//coverage') is not None:
                coverage = root.find('.//coverage').text
                coverage = coverage.replace("%", "")

            comments = 'n/a'
            if root.find('.//comments') is not None:
                comments = root.find('.//comments').text

            comment = 'n/a'
            if root.find('.//comment') is not None:
                comment = root.find('.//comment').text

            # Update the database, measurement_gene_panel table
            cursor = self.conn.cursor()

            # Get the specimen ID for this patient
            hash_object = hashlib.sha256(christieNo.encode('utf-8'))
            hex_dig = hash_object.hexdigest()
            
            print("hex_dig " + str(hex_dig));
            #gdlSQL = "SELECT specimen.specimen_id FROM SPECIMEN AS specimen, PERSON AS person WHERE person.hospital_number_hash = '"+hex_dig+"'"
            gdlSQL = """SELECT TOP 1 specimen.specimen_id FROM SPECIMEN AS specimen left join PERSON AS person on specimen.person_id= person.person_id 
            WHERE specimen.person_id = (SELECT person_id FROM PERSON WHERE person.hospital_number_hash = %s) AND (specimen.specimen_concept_id = 2 OR specimen.specimen_concept_id = 3) 
            AND dbo.RemoveNonAlphaNumChars(tumour_id) like dbo.RemoveNonAlphaNumChars(%s) order by specimen.specimen_id asc"""
            cursor.execute(gdlSQL, (hex_dig, pathLabRef))
            #print(gdlSQL)

            specimenID = 0
            for row in cursor:
                specimenID = row[0]

            print('SPECIMEN ID IS: '+str(specimenID))

            if specimenID == 0:
                # No specimen found
                self.log.logMessage('No specimen ID found for '+filename)
                raise Exception('No specimen ID found.')

            else:
                #check resubmission tumour
                resubmission,prev_gene_panel_id=self.checkResubmissionTumour(specimenID, 1, 1)
                print("resubmission: " + str(resubmission) + "prev_gene_panel: " + str(prev_gene_panel_id))
                updateSelection=False
                if resubmission==1:
                    self.deleteOldTumourReport(prev_gene_panel_id);
                    self.log.systemStatusUpdate(filename, 'GDL', self.timestamp(), "Resubmit of file -- delete old content")
                    self.log.logMessage(filename + ' Resubmit of file -- delete old content')
                if resubmission==2:
                    updateSelection=True
                
                #find the latest GDL panel id
                cursor = self.conn.cursor()
                genePanelSQL= "select top 1 data_source_concept_id from CONCEPT_DATA_SOURCES where valid_start_date<%s order by valid_start_date desc"
                cursor.execute(genePanelSQL, (reportDateFormatted))
                row=cursor.fetchone()
                if row is not None:
                    data_source_concept_id=row[0]
                else:
                    data_source_concept_id=2
                
                # Update MEASUREMENT_GENE_PANEL
                cursor = self.conn.cursor()
                gdlSQL = """INSERT INTO MEASUREMENT_GENE_PANEL (specimen_id, data_source_concept_id, coverage, average_read_depth, ngs_run, analysis_failed, no_mutations_found, run_number, baseline_number, comments, unknown_significance) 
                VALUES(%s, %s, %s, 'n/a', 'GDL', 'false', 'false', 1, 1, %s, %s)"""
                #print(gdlSQL)
                cursor.execute(gdlSQL, (specimenID, data_source_concept_id, coverage, comments, comment))
                measurement_gene_panel_id = cursor.lastrowid

                #print(gdlSQL)

                # Update REPORT
                cursor = self.conn.cursor()
                reportFilename = reportRef+'.pdf'
                reportSQL = "INSERT INTO REPORT (date_issued, measurement_gene_panel_id, filename) VALUES(%s, %d, %s)"
                cursor.execute(reportSQL, (reportDateFormatted, measurement_gene_panel_id, reportFilename))

                #print(reportSQL)

                # Update PATH_LAB_REF
                cursor = self.conn.cursor()
                pathSQL = "INSERT INTO PATH_LAB_REF (path_lab_ref, specimen_id, measurement_gene_panel_id) VALUES(%s, %d, %d)"
                #print(pathSQL)
                cursor.execute(pathSQL, (pathLabRef, specimenID, measurement_gene_panel_id))

                # Update GDL_REQUEST
                cursor = self.conn.cursor()
                gdlDateFormatted = datetime.datetime.strptime(gdlDate, '%d-%b-%Y').strftime('%Y-%m-%d')

                #print(gdlDate)
                #print(gdlDateFormatted)

                #reportSQL = "INSERT INTO GDL_REQUEST (date_requested, path_lab_ref, specimen_id, sample_type) VALUES('"+gdlDateFormatted+"', '"+pathLabRef+"', '"+str(specimenID)+"', '')"
                #reportSQL = "UPDATE GDL_REQUEST SET date_requested='"+gdlDateFormatted+"', path_lab_ref='"+pathLabRef+"' WHERE specimen_id='"+str(specimenID)+"'"
                #print(reportSQL)
                #cursor.execute(reportSQL)

                # Get the gene results data and update MEASUREMENT_GENE_VARIANT
                geneCount = 0
                try:
                    insertSGVarguments=[]
                    for gene in root.iter(tag = 'gene'):
                        geneName = gene.attrib['name']

                        for variant in gene[0]:
                            cdna = variant[0].text
                            if variant.find('protein') is not None:
                                amino = variant.find('protein').text
                            else:
                                amino = ""
                            if variant.find('reads') is not None:
                                reads = variant.find('reads').text
                                reads = reads.replace("%", "")
                            else:
                                reads = ""

                            cursor = self.conn.cursor()
                            gdlSQL = "INSERT INTO MEASUREMENT_GENE_VARIANT (measurement_gene_panel_id, gene_concept_id, cdna_change, amino_acid_change, variant_allele_frequency) VALUES(%d, (SELECT TOP 1 gene_concept_id FROM CONCEPT_GENE WHERE gene_name = %s), %s, %s, %s)"
                            cursor.execute(gdlSQL,(measurement_gene_panel_id, geneName, cdna, amino, reads))
                            measurement_gene_variant_id = cursor.lastrowid
                                                
                            # new version of old tumour report keep selections
                            if updateSelection == True:
                                findPreviouseSelectionQuery="select person_id from SELECTED_GENE_VARIANT FULL OUTER JOIN MEASUREMENT_GENE_VARIANT on MEASUREMENT_GENE_VARIANT.measurement_gene_variant_id=SELECTED_GENE_VARIANT.measurement_gene_variant_id FULL OUTER JOIN MEASUREMENT_GENE_PANEL on MEASUREMENT_GENE_PANEL.measurement_gene_panel_id=MEASUREMENT_GENE_VARIANT.measurement_gene_panel_id where person_id=(SELECT person_id FROM PERSON WHERE person.hospital_number_hash = '"+hex_dig+"') and type='NGS' and baseline_number=1 and run_number=1 and MEASUREMENT_GENE_PANEL.measurement_gene_panel_id=" +str(prev_gene_panel_id)+ " and gene_concept_id=(SELECT TOP 1 gene_concept_id FROM CONCEPT_GENE WHERE gene_name = '"+str(geneName)+"') and amino_acid_change = '"+str(amino)+"' and cdna_change ='"+str(cdna)+"';"
                                findPreviousSelectionCursor = self.conn.cursor()
                                findPreviousSelectionCursor.execute(findPreviouseSelectionQuery)
                                row=findPreviousSelectionCursor.fetchone()
                                findPreviousSelectionCursor.close()
                                if row is not None:
                                    #was previously selected
                                    insertSGVarguments.append((hex_dig, measurement_gene_variant_id))
                    insertSelectionQuery="insert into SELECTED_GENE_VARIANT (person_id, measurement_gene_variant_id, type) values((SELECT person_id FROM PERSON WHERE person.hospital_number_hash = %s),%d,'NGS');"
                    if len(insertSGVarguments)>0:
                        insertSelectionCursor = self.conn.cursor()
                        insertSelectionCursor.executemany(insertSelectionQuery, insertSGVarguments)
                        insertSelectionCursor.close()
                except Exception as e:
                    print(str(e))
                    print('Error importing genes.')
                    self.log.logMessage('GDL -- Error importing genes.' +str(e))
                    raise e

                # Delete the copies
                self.log.logMessage('Deleting GDL xml data file '+filename)
                try:
                    self.conn.commit()
                    if self.file_service.exists(self.data, None, filename):
                        self.file_service.delete_file(self.data, None, filename)
                    self.log.systemStatusUpdate(filename, 'GDL', self.timestamp(), 'Success')
                except:
                    self.log.logMessage('There was a problem deleting '+filename)
        except Exception as e:
            self.log.systemStatusUpdate(filename, 'GDL', self.timestamp(), 'Error: XML file could not be imported.')
            self.conn.rollback()
            print(str(e))
            return 0