def processBloodFile()

in clns-eTarget_ingest/targetdata.py [0:0]


    def processBloodFile(self, filename):
        self.log.logMessage('Fetching file: '+filename)
        #with open(self.filepath+filename) as csvfile:
            #readCSV = csv.reader(csvfile, delimiter=',')

        csvfile = self.file_service.get_file_to_text(self.data, None, filename)
        csvString = csvfile.content
        csvString = csvString.splitlines()
        readCSV = csv.reader(csvString, delimiter=',', dialect=csv.excel_tab)

        # Get the row count so we know where to trim for the genes
        rowCount = sum(1 for row in csv.reader(csvString, delimiter=',', dialect=csv.excel_tab))
        #csvfile.seek(0)
        self.log.logMessage('Row count is: '+str(rowCount))

        # Patient data from the file
        patientDetails = {
            'patientID': '',
            'baseline': '',
            'runNumber': '',
            'sampleType': '',
            'reportIssued': '',
            'ngsRun': '',
            'ngsSampleType': '',
            'pipelineVersion': '',
            'ngsLibraryCFDNAInput': '',
            'averageReadDepth': '',
            'colourGreenCFDNA': '',
            'colourYellowCFDNA': '',
            'colourRedCFDNA': '',
            'colourGreenReadDepth': '',
            'colourYellowReadDepth': '',
            'colourRedReadDepth': '',
            'detectionLevel': '',
            'ngsComment': '',
            'exploratoryComment': '',
            'geneData': {}
        }

        # Loop over the rows and update the patientDetails
        baselineNumberKey = {
            'Baseline1': 1,
            'B': 1,
            'Baseline2': 2,
            'B2': 2,
            'DT1': 3,
            'DT2': 4,
            'DT3': 5,
            'DT4': 6,
            'DT5': 7,
            'DT6': 8,
            'DT7': 9,
            'DT8': 10,
            'DT9': 11,
            'DT10': 12,
            'DT11': 13,
            'DT12': 14,
            'DT13': 15,
            'DP1': 16,
            'DP2': 17,
            'DP3': 18,
            'EoT': 19,
            'EoT2':20,
            'EoT3':21
        }

        geneRowsStart = rowCount-19
        currentRow = 1
        currentGeneRow = 1
        for row in readCSV:
            # Get the patient ID
            if 'Patient_ID' in row[0]:
                patientDetails['patientID'] = row[1]

            # Get the baseline number
            if 'Visit' in row[0]:
                try:
                    if row[1].startswith('NT'):
                        row[1]=row[1][3:]

                    if (not 'baseline' in patientDetails or len(str(patientDetails['baseline'])) ==0): 
                        patientDetails['baseline'] = baselineNumberKey[row[1]]

                    print('Baseline number is: '+str(patientDetails['baseline']))
                except Exception as e:
                    self.log.systemStatusUpdate(filename, 'CEP', self.timestamp(), 'Error: Baseline missing or incorrect')
                    print(str(e))
                    return 0

            # Get the run number
            if 'Run' == row[0]:
                patientDetails['runNumber'] = row[1]

            # Get the sample type
            if 'Sample Type' in row[0]:
                patientDetails['sampleType'] = row[1]

            # Get the date report issued
            if 'Date Report Issued' in row[0]:
                patientDetails['reportIssued'] = row[1]

            # Get the NGS run
            if 'NGS Run' in row[0]:
                patientDetails['ngsRun'] = row[1]

            # Get the NGS sample type
            if 'NGS Sample Type' in row[0]:
                patientDetails['ngsSampleType'] = row[1]

            # Get the Bioinformatics Pipeline
            if 'Bioinformatics Pipeline' in row[0]:
                patientDetails['pipelineVersion'] = row[1]

            # Get the NGS Library cfDNA input
            if 'NGS Library cfDNA Input' in row[0]:
                patientDetails['ngsLibraryCFDNAInput'] = row[1]

            # Get the Average Read Depth
            if 'Average Read Depth' in row[0]:
                patientDetails['averageReadDepth'] = row[1]

            # Get the Colour Green cfDNA
            if 'Colour Green cfDNA' in row[0]:
                patientDetails['colourGreenCFDNA'] = row[1]

            # Get the Colour Yellow cfDNA
            if 'Colour Yellow cfDNA' in row[0]:
                patientDetails['colourYellowCFDNA'] = row[1]

            # Get the Colour Red cfDNA
            if 'Colour Red cfDNA' in row[0]:
                patientDetails['colourRedCFDNA'] = row[1]

            # Get the Colour Green Read Depth
            if 'Colour Green Read Depth' in row[0]:
                patientDetails['colourGreenReadDepth'] = row[1]

            # Get the Colour Yellow Read Depth
            if 'Colour Yellow Read Depth' in row[0]:
                patientDetails['colourYellowReadDepth'] = row[1]

            # Get the Colour Red Read Depth
            if 'Colour Red Read Depth' in row[0]:
                patientDetails['colourRedReadDepth'] = row[1]

            # Get the Detection Level
            if 'Detection Level' in row[0]:
                patientDetails['detectionLevel'] = row[1]

            # Get the NGS comment
            if 'Comments for NGS Subset' in row[0]:
                patientDetails['ngsComment'] = row[1].replace("'","''")

            # Get the Exploratory comment
            if 'Comment for Exploratory Subset' in row[0]:
                patientDetails['exploratoryComment'] = row[1].replace("'","''")

            # Get the gene rows as lists
            if currentRow > 20:
                # Add the gene data
                patientDetails['geneData'][currentGeneRow] = row

                # Increment the tracking value
                currentGeneRow = currentGeneRow+1

            # Increment the tracking value
            currentRow = currentRow+1

        #for i in patientDetails:
            #print(str(patientDetails[i]))
            #self.log.logMessage(i +' '+ patientDetails[i])

        #print('--------->'+str(patientDetails['runNumber']))
        #print(len(patientDetails['runNumber']))

        if len(patientDetails['runNumber']) == 0:
            self.log.systemStatusUpdate(filename, 'CEP', self.timestamp(), 'Error: Run number missing')
            return 0
        else:
            #check if re-submit and delete old data if patient was not discussed since first submission
            resubmission,prev_gene_panel_id=self.checkResubmission(patientDetails)
            print('Resubmission: ' + str(resubmission))
            #if resubmission == 2:
            #    self.log.systemStatusUpdate(filename, 'CEP', self.timestamp(), 'Error: Resubmission after patient being discussed')
            #    self.log.logMessage(filename + ' Error: Resubmission after patient being discussed')
            #    return 0
            if resubmission == 1:
                self.deleteOldBloodReport(patientDetails)
                self.log.systemStatusUpdate(filename, 'CEP', self.timestamp(), "Resubmit of file -- delete old content")
                self.log.logMessage(filename + ' Resubmit of file -- delete old content')
            self.updateDatabaseBloodReport(patientDetails, filename, (resubmission==2), prev_gene_panel_id)