in docker/services/storage_service.py [0:0]
def _download_metrics_s3(self, data_source: S3Storage, output_path,
scan_customer, scan_clouds, scan_tenants,
scan_from_date=None, scan_to_date=None,
max_days=None):
access = data_source.access
prefix = access.prefix
bucket_name = access.bucket_name
paths = self._build_s3_paths(prefix=prefix,
scan_customer=scan_customer,
scan_clouds=scan_clouds,
scan_tenants=scan_tenants)
_LOG.debug(f'Listing objects in bucket \'{bucket_name}\'. '
f'from paths: \'{paths}\'')
objects = []
if paths:
for path in paths:
files = self.s3_client.list_objects(bucket_name=bucket_name,
prefix=path)
if files:
objects.extend(files)
else:
files = self.s3_client.list_objects(bucket_name=bucket_name)
objects.extend(files)
objects = [obj for obj in objects if
obj.get('Key').endswith(CSV_EXTENSION)
or obj.get('Key').endswith(f'/{META_FILE_NAME}')]
if not scan_from_date and max_days:
_LOG.debug(f'Start stan date is not specified. Going to use '
f'limitation from algorithm of {max_days} days')
scan_start_dt = datetime.utcnow() - timedelta(days=max_days)
scan_from_date = scan_start_dt.strftime(DATE_FORMAT)
filter_only_dates = self.get_scan_dates_list(
scan_from_date=scan_from_date,
scan_to_date=scan_to_date
)
if filter_only_dates:
objects = [obj for obj in objects if obj['Key'].split('/')[-2]
in filter_only_dates]
_LOG.debug(f'{len(objects)} metric/meta files found, downloading')
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
futures = []
for file in objects:
path = file.get('Key').split('/')
if len(path) > 0 and path[0] == prefix:
path = path[1:]
path = '/'.join(path[:-1])
output_folder_path = '/'.join((output_path, path))
os.makedirs(output_folder_path, exist_ok=True)
futures.append(executor.submit(
self.s3_client.download_file,
bucket_name=bucket_name,
full_file_name=file.get('Key'),
output_folder_path=output_folder_path
))