def getNewFiles()

in clns-eTarget_ingest/targetdata.py [0:0]


    def getNewFiles(self):
        # Get new files
        print('')
        processedFiles = 0
        #files = os.listdir(self.filepath)
        files = self.file_service.list_directories_and_files(self.data)
        for file in files:
            try:
                # Filter the files
                if file.name.lower().endswith('.csv') and not file.name.startswith('.'):
                    try:
                        fileContent = self.file_service.get_file_to_text(self.data, None, file.name)
                    except Exception as e:
                        fileContent = self.file_service.get_file_to_text(self.data, None, file.name, encoding="cp1252")
                    fileContentString = fileContent.content
                    firstCell = fileContentString.split(',')
    
                    if 'Patient_ID' in firstCell[0]:
                        # Blood sample report
                        self.log.logMessage('Processing blood report: '+file.name)
                        self.processBloodFile(file.name)
                        processedFiles = processedFiles+1
    
                    elif 'hospitalnumber' in firstCell[0].lower():
                        # Christie patient data
                        self.log.logMessage('Processing Christie data: '+file.name)
                        cd = clinicaldata.ClinicalData(file.name,self.config['remotehostname'], self.config['remoteusername'], \
                                                       self.config['remotepassword'], self.config['remotedbname'],self.config['fileuser'],self.config['filekey'],self.data, self.config['logblob'])
                        if cd.ingest()==0:
                            cd.deleteFile()
                        #self.processChristieFile(file.name)
                        processedFiles = processedFiles+1
    
                    else:
                        self.log.logMessage('Unknown CSV file: '+file.name)
                        self.log.logMessage('Skipping file: '+file.name)
    
                if file.name.lower().endswith('.pdf') and not file.name.startswith('.'):
                    pdf = self.file_service.get_file_to_bytes(self.data, None, file.name)
                    with open(self.filepath+'/tmp/'+file.name, 'wb') as f:
                        f.write(pdf.content)
                        self.processPDFReport(file.name)
                        
                if file.name.lower().endswith('.jpg') and not file.name.startswith('.'):
                    self.processIHCPictures(file.name)
    
                if file.name.lower().endswith('.xml') and not file.name.startswith('.'):
                    fileContent = self.file_service.get_file_to_text(self.data, None, file.name)
                    fileContentString = fileContent.content
                    if('foundationmedicine.com' in fileContentString):
                        try:
                            fm = foundationmedicine.FoundationMedicine(file.name,self.config['remotehostname'], self.config['remoteusername'], \
                                                                   self.config['remotepassword'], self.config['remotedbname'],self.config['fileuser'],self.config['filekey'], self.data, self.config['logblob'])
                            fm.ingest()
                            fm.deleteFile()
                        except Exception as e:
                            self.log.logMessage('Problems ingesting FM file ' + file.name + ' ' +str(e))
                    else:
                        self.processGDLData(file.name, fileContentString)
                if file.name.lower().endswith('.xlsx') and not file.name.startswith('.'):
                    #do something
                    ihc = ihc_report.IHC_Report(file.name,self.config['remotehostname'], self.config['remoteusername'], \
                                                self.config['remotepassword'], self.config['remotedbname'],self.config['fileuser'],self.config['filekey'], self.data, self.config['logblob'])
                    data=ihc.ingest()
                    if data is not None:
                        ihc.deleteFile()
                if file.name.lower().endswith('.json') and not file.name.startswith('.'):
                    fileContent = self.file_service.get_file_to_text(self.data, None, file.name)
                    fileContentString = fileContent.content
                    if 'clinical' in fileContentString:
                        cd = clinical_json.ClinicalDataJson(file.name,self.config['remotehostname'], self.config['remoteusername'], \
                                                           self.config['remotepassword'], self.config['remotedbname'],self.config['fileuser'],self.config['filekey'],self.data, self.config['logblob'])
                        if cd.ingest()==0:
                            cd.deleteFile()
                            #self.processChristieFile(file.name)
                            processedFiles = processedFiles+1
                    elif 'genomic' in fileContentString:
                        genomic = genomicdata_json.GenomicDataJson(file.name,self.config['remotehostname'], self.config['remoteusername'], \
                                                           self.config['remotepassword'], self.config['remotedbname'],self.config['fileuser'],self.config['filekey'],self.data, self.config['logblob'])
                        if genomic.ingest()==0:
                            genomic.deleteFile()
                            processedFiles = processedFiles+1
                    else:
                        self.log.logMessage('json file type not recognised ' + str(file))
                        self.log.systemStatusUpdate(file.name, 'unknown', self.timestamp(), 'Error: JSON file type not recognised')
                            
            except Exception as e:
                self.log.logMessage('exception occurred ' + str(file) + " " + str(e))
                
        if processedFiles == 0:
            self.log.logMessage('No files to process.')
            self.log.logMessage('File list:'+",".join(map(str, files)))