in src/main/python/s3mper.py [0:0]
def checked_listing(self, s3_listing, path):
""" Checks the s3_listing against the metastore listing. All attempts
are made to use the boto generator for listing if a check isn't
necessary, but if a check must be made the whole listing for both
the metastore and s3 listing need to be pulled into memory.
"""
if self.disabled:
return s3_listing
expected = set( [p.url for p in self.list(path)] )
if not expected:
return s3_listing
#This isn't ideal since we are sucking in the whole listing
#to perform the check, but if we check on-the-fly, processing
#could be partially complete before inconsistency is detected
s3_listing = list(s3_listing())
for p in s3_listing:
expected.discard(p if not isinstance(p, Key) else 's3://%s/%s' % (p.bucket, p.name))
if not expected:
return s3_listing
else:
logger.error("Failed consistency check. Missing file count %d. Missing paths: %s" % (len(expected), expected))
self.__send_alert(expected)
if self.fail_on_error:
raise S3ConsistencyException(expected)