clns-eTarget_ingest/ihc_report.py (171 lines of code) (raw):

# # This file is part of the eTarget ingest distribution (https://github.com/digital-ECMT/eTarget_ingest). # Copyright (C) 2017 - 2021 digital ECMT # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, version 3. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # from openpyxl import Workbook from openpyxl import load_workbook from azure.storage.file import FileService from pycel import ExcelCompiler from datetime import datetime import sys import pymssql import utilities import io import re class IHC_Report: def __init__(self, filename, remotehostname, remoteusername, remotepassword, remotedbname, fileuser, filekey, datadir='data', logblob='log'): self.excelFile=filename self.host=remotehostname self.user=remoteusername self.pw=remotepassword self.db=remotedbname self.datadir=datadir self.file_service = FileService(account_name=fileuser, account_key=filekey) output_stream = io.BytesIO() self.file_service.get_file_to_stream(self.datadir, None, self.excelFile, output_stream) self.wb = load_workbook(output_stream) self.ws=self.wb["Report"] self.excel = ExcelCompiler(excel=self.wb) self.log = utilities.Util(remotehostname, remoteusername, remotepassword, remotedbname, fileuser, filekey, logblob) self.conn = pymssql.connect(self.host,self.user, self.pw, self.db, autocommit=False) def __del__(self): self.conn.close() def ingest(self): data={} target_id = self.ws['B1'].value data["person_id"] = self.queryPerson(target_id) data["specimen_id"] = self.querySpecimen(self.ws['B2'].value, data["person_id"]) # if self.checkRecordExists(data["specimen_id"])== True : # self.log.logMessage("Record exits for " + str(target_id)) # self.log.systemStatusUpdate(self.excelFile, 'IHC', self.log.timestamp(),'Warning: report exists') # return(None) resubmission=self.checkResubmission(data["specimen_id"]) # if resubmission[0]==2: # self.log.logMessage("Resubmission after patient being discussed " + str(target_id)) # self.log.systemStatusUpdate(self.excelFile, 'IHC', self.log.timestamp(),'Error: report cannot be overwritten') # return(None) if resubmission[0]==1: self.log.logMessage("Resubmission - overwrite data " + str(target_id)) self.deleteRecord(resubmission[1]) data["date_received"] = self.parseDate(self.ws['B3'].value) data["date_report"] = self.parseDate(self.ws['B4'].value) data["cd3_area"] = self.parseFloat(self.excel.evaluate('Report!B5')) data["cd3_tumoural"] = self.parseFloat(self.excel.evaluate('Report!B6')) data["cd3_stromal"] = self.parseFloat(self.excel.evaluate('Report!B7')) data["cd8_area"] = self.parseFloat(self.excel.evaluate('Report!B8')) data["cd8_tumoural"] = self.parseFloat(self.excel.evaluate('Report!B9')) data["cd8_stromal"] = self.parseFloat(self.excel.evaluate('Report!B10')) data["pdl1_tps"] = self.ws['B11'].value data["estimated_result"] = self.ws['B12'].value data["comments"] = self.ws['B13'].value print(str(data)) id=self.insertIHCRecord(data); if id>0: self.log.systemStatusUpdate(self.excelFile, 'IHC', self.log.timestamp(),'Success') self.commit() return(data) self.log.systemStatusUpdate(self.excelFile, 'IHC', self.log.timestamp(),'Error: Database commit not successful') return(None) def queryPerson(self, target_id): if len(target_id)==0 or len(target_id)>10: self.log.logMessage("Patient id of wrong size " + str(target_id)) self.log.systemStatusUpdate(self.excelFile, 'IHC', self.log.timestamp(),'Error: TargetID has wrong size.') raise Exception('Patient ID is empty or too long') query="SELECT person_id FROM PERSON where target_id='"+str(target_id)+"'" cursor=self.conn.cursor() cursor.execute(query) row=cursor.fetchone() if row is None: self.log.logMessage("Can't find Patient id " + str(target_id)) self.log.systemStatusUpdate(self.excelFile, 'IHC', self.log.timestamp(),'Error: Patient ID not found in database.') raise Exception('Cannot find Target ID') return(row[0]) def querySpecimen(self, specimentext, person_id): if len(specimentext)== 0: self.log.logMessage("SampleID is empty ") self.log.systemStatusUpdate(self.excelFile, 'IHC', self.log.timestamp(),'Error: SampleID empty') raise Exception('Sample ID is empty') pattern = re.compile("^[A-Z]{3}\d{7}Bx\d{1,2}IC") if not re.match(pattern, specimentext): self.log.systemStatusUpdate(self.excelFile, 'IHC', self.log.timestamp(), 'Error: specimen ID is incompatible with the naming convention') self.log.logMessage('Error ingesting IHC data - sample ID has the wrong structure') raise Exception('Error sample id does not match pattern') timepoint=specimentext[10:] # pattern = re.compile("^T\d{1,2}(\D|$)") # if not re.match(pattern, timepoint): # self.log.systemStatusUpdate(self.filename, 'IHC', self.log.timestamp(), 'Error: specimen ID is incompatible with the naming convention - timepoint') # self.log.logMessage('Error ingesting IHC data - sample ID has the wrong structure - timepoint') # raise Exception('Error sample id does not match pattern') p2 = re.compile('\d{1,2}') t = p2.findall(timepoint)[0] query="select specimen_id, SPECIMEN.person_id FROM SPECIMEN left join PERSON on PERSON.person_id=SPECIMEN.person_id where specimen_concept_id!=1 and baseline_number="+t+" and target_id='"+specimentext[:10]+"'" print(query) cursor= self.conn.cursor() cursor.execute(query) row=cursor.fetchone() if row is None: self.log.logMessage("Sample not found " + specimentext) self.log.systemStatusUpdate(self.excelFile, 'IHC', self.log.timestamp(),'Error: Sample ID not found') raise Exception('Cannot find Specimen ID') if row[1]!=person_id: self.log.logMessage("Sample ID does not fit patient ") self.log.systemStatusUpdate(self.excelFile, 'IHC', self.log.timestamp(),'Error: Sample ID does not fit patient') raise Exception('Specimen ID does not fit patient') return(row[0]) def parseDate(self, date): if date is None or len(date)==0: return 'NULL' return datetime.strptime(date,'%d%b%Y').strftime('%Y-%m-%d') def parseFloat(self, float): if isinstance(float,str): return('NULL') if float is None: return('NULL') return(float) def insertIHCRecord(self, data): insert="INSERT INTO IHC_REPORT (person_id, specimen_id, sample_received_date, report_date, cd3_total_tissue_area, cd3_intratumoural, cd3_intrastromal, cd8_total_tissue_area, cd8_intratumoural, cd8_intrastromal, pdl1_tps, estimated_results, comments) values("+str(data['person_id'])+", "+str(data['specimen_id'])+", '"+str(data['date_received'])+"', '"+str(data['date_report'])+"', "+str(data['cd3_area'])+"," + str(data['cd3_tumoural'])+", "+str(data['cd3_stromal'])+", "+str(data['cd8_area'])+", "+str(data['cd8_tumoural'])+", "+str(data['cd8_stromal'])+", '"+str(data['pdl1_tps'])+"', '" + str(data['estimated_result'])+"', '"+str(data['comments'])+"');" print(insert) cursor= self.conn.cursor() cursor.execute(insert) return cursor.lastrowid def checkRecordExists(self, specimen_id): select="select * from IHC_REPORT where specimen_id="+str(specimen_id) cursor=self.conn.cursor() cursor.execute(select) row=cursor.fetchone() if row is None: return(False) return(True) # checkResubmission IHC report def checkResubmission(self, specimen_id): checkSubmission = "select person_id, ingestion_date, ihc_report_id from IHC_REPORT where specimen_id="+str(specimen_id) cursorRun = self.conn.cursor() cursorRun.execute(checkSubmission) row = cursorRun.fetchone() cursorRun.close() if row is None: return 0,0 person_id=None if row[0] is not None: person_id=row[0] ingestion_date = row[1] ihc_report_id = row[2] lastReportSQL = "SELECT created_on FROM MEETING_OUTCOME as mo where mo.person_id="+str(person_id)+" ORDER BY created_on DESC;" cursor = self.conn.cursor() cursor.execute(lastReportSQL) row = cursor.fetchone(); cursor.close() if row is not None: lastDiscussedDate = row[0] if ingestion_date is not None: if lastDiscussedDate>=ingestion_date: return 2,ihc_report_id else: return 1,ihc_report_id return 1,ihc_report_id def deleteRecord(self, ihc_report_id): if ihc_report_id is None or type(ihc_report_id) is not int: return deleteIHCRecord = "delete from IHC_REPORT where ihc_report_id =" + str(ihc_report_id) cursor = self.conn.cursor() cursor.execute(deleteIHCRecord) def commit(self): self.conn.commit(); def deleteFile(self): try: if self.file_service.exists(self.datadir, None, self.excelFile): self.file_service.delete_file(self.datadir, None, self.excelFile) self.log.systemStatusUpdate(self.excelFile, 'IHC', self.log.timestamp(), 'Success') except: self.log.logMessage('There was a problem deleting '+self.excelFile)