in clns-eTarget_ingest/targetdata.py [0:0]
def getNewFiles(self):
# Get new files
print('')
processedFiles = 0
#files = os.listdir(self.filepath)
files = self.file_service.list_directories_and_files(self.data)
for file in files:
try:
# Filter the files
if file.name.lower().endswith('.csv') and not file.name.startswith('.'):
try:
fileContent = self.file_service.get_file_to_text(self.data, None, file.name)
except Exception as e:
fileContent = self.file_service.get_file_to_text(self.data, None, file.name, encoding="cp1252")
fileContentString = fileContent.content
firstCell = fileContentString.split(',')
if 'Patient_ID' in firstCell[0]:
# Blood sample report
self.log.logMessage('Processing blood report: '+file.name)
self.processBloodFile(file.name)
processedFiles = processedFiles+1
elif 'hospitalnumber' in firstCell[0].lower():
# Christie patient data
self.log.logMessage('Processing Christie data: '+file.name)
cd = clinicaldata.ClinicalData(file.name,self.config['remotehostname'], self.config['remoteusername'], \
self.config['remotepassword'], self.config['remotedbname'],self.config['fileuser'],self.config['filekey'],self.data, self.config['logblob'])
if cd.ingest()==0:
cd.deleteFile()
#self.processChristieFile(file.name)
processedFiles = processedFiles+1
else:
self.log.logMessage('Unknown CSV file: '+file.name)
self.log.logMessage('Skipping file: '+file.name)
if file.name.lower().endswith('.pdf') and not file.name.startswith('.'):
pdf = self.file_service.get_file_to_bytes(self.data, None, file.name)
with open(self.filepath+'/tmp/'+file.name, 'wb') as f:
f.write(pdf.content)
self.processPDFReport(file.name)
if file.name.lower().endswith('.jpg') and not file.name.startswith('.'):
self.processIHCPictures(file.name)
if file.name.lower().endswith('.xml') and not file.name.startswith('.'):
fileContent = self.file_service.get_file_to_text(self.data, None, file.name)
fileContentString = fileContent.content
if('foundationmedicine.com' in fileContentString):
try:
fm = foundationmedicine.FoundationMedicine(file.name,self.config['remotehostname'], self.config['remoteusername'], \
self.config['remotepassword'], self.config['remotedbname'],self.config['fileuser'],self.config['filekey'], self.data, self.config['logblob'])
fm.ingest()
fm.deleteFile()
except Exception as e:
self.log.logMessage('Problems ingesting FM file ' + file.name + ' ' +str(e))
else:
self.processGDLData(file.name, fileContentString)
if file.name.lower().endswith('.xlsx') and not file.name.startswith('.'):
#do something
ihc = ihc_report.IHC_Report(file.name,self.config['remotehostname'], self.config['remoteusername'], \
self.config['remotepassword'], self.config['remotedbname'],self.config['fileuser'],self.config['filekey'], self.data, self.config['logblob'])
data=ihc.ingest()
if data is not None:
ihc.deleteFile()
if file.name.lower().endswith('.json') and not file.name.startswith('.'):
fileContent = self.file_service.get_file_to_text(self.data, None, file.name)
fileContentString = fileContent.content
if 'clinical' in fileContentString:
cd = clinical_json.ClinicalDataJson(file.name,self.config['remotehostname'], self.config['remoteusername'], \
self.config['remotepassword'], self.config['remotedbname'],self.config['fileuser'],self.config['filekey'],self.data, self.config['logblob'])
if cd.ingest()==0:
cd.deleteFile()
#self.processChristieFile(file.name)
processedFiles = processedFiles+1
elif 'genomic' in fileContentString:
genomic = genomicdata_json.GenomicDataJson(file.name,self.config['remotehostname'], self.config['remoteusername'], \
self.config['remotepassword'], self.config['remotedbname'],self.config['fileuser'],self.config['filekey'],self.data, self.config['logblob'])
if genomic.ingest()==0:
genomic.deleteFile()
processedFiles = processedFiles+1
else:
self.log.logMessage('json file type not recognised ' + str(file))
self.log.systemStatusUpdate(file.name, 'unknown', self.timestamp(), 'Error: JSON file type not recognised')
except Exception as e:
self.log.logMessage('exception occurred ' + str(file) + " " + str(e))
if processedFiles == 0:
self.log.logMessage('No files to process.')
self.log.logMessage('File list:'+",".join(map(str, files)))