in deploy/docker/cp-tools/research/wsi-parser/process_files.py [0:0]
def _collect_tags(self, vsi_file, tag_prefix):
metadata_block_start_position = vsi_file.tell()
file_length = self._get_file_length()
if metadata_block_start_position + self.HEADER_BYTES_SIZE >= file_length:
return False
data_field_offset, tag_count = self.read_metadata_block_header(vsi_file)
if tag_count > file_length or tag_count < 1:
return False
metadata_block_data_position = metadata_block_start_position + data_field_offset
if metadata_block_data_position < 0 or metadata_block_data_position >= file_length:
return False
vsi_file.seek(metadata_block_data_position)
for i in range(0, tag_count):
field_type = self._read_int(vsi_file)
tag = self._read_int(vsi_file)
next_field = self._read_int(vsi_file) & 0xffffffff
data_size = self._read_int(vsi_file)
extra_tag = ((field_type & 0x8000000) >> 27) == 1
extended_field = ((field_type & 0x10000000) >> 28) == 1
inline_data = ((field_type & 0x40000000) >> 30) == 1
real_type = field_type & 0xffffff
if extra_tag:
self._read_int(vsi_file)
if tag < 0:
if not inline_data and data_size + vsi_file.tell() < file_length:
self._skip_bytes(vsi_file, data_size)
return False
if extended_field and real_type == self.NEW_VOLUME_HEADER:
self.extract_new_volume_header(data_size, file_length, tag, vsi_file)
elif extended_field and (real_type == self.PROPERTY_SET_VOLUME or real_type == self.NEW_MDIM_VOLUME_HEADER):
tag_name = self._get_volume_name(tag) if real_type == self.NEW_MDIM_VOLUME_HEADER else tag_prefix
self.parents.append(tag)
self._collect_tags(vsi_file, tag_name)
self.parents.pop()
else:
value = str(data_size) if inline_data else ''
if not inline_data and data_size > 0:
if real_type == self.CHAR or \
real_type == self.UCHAR:
value = str(vsi_file.read(1))
if real_type == self.SHORT or \
real_type == self.USHORT:
value = str(self._read_short(vsi_file))
if real_type == self.INT or \
real_type == self.UINT or \
real_type == self.DWORD or \
real_type == self.FIELD_TYPE or \
real_type == self.MEM_MODEL or \
real_type == self.COLOR_SPACE:
int_value = self._read_int(vsi_file)
value = str(int_value)
if real_type == self.LONG or \
real_type == self.ULONG or \
real_type == self.TIMESTAMP:
long_value = self._read_long(vsi_file)
value = str(long_value)
if real_type == self.FLOAT:
value = str(self._read_float(vsi_file))
if real_type == self.DOUBLE or \
real_type == self.DATE:
value = str(self._read_double(vsi_file))
if real_type == self.BOOLEAN:
value = str(self._read_bool(vsi_file))
if real_type == self.TCHAR or \
real_type == self.UNICODE_TCHAR:
value = self._read_string(vsi_file, data_size)
if tag == self.USER_DEFINED_ITEM:
self._push_to_user_defined_meta(list(self.parents), value)
if tag == self.USER_DEFINED_SPECIES:
self._push_to_results('Species', value)
if next_field == 0 or tag == -494804095:
if metadata_block_start_position + data_size + 32 < file_length \
and metadata_block_start_position + data_size >= 0:
vsi_file.seek(metadata_block_start_position + data_size + 32)
return False
next_fp_position = metadata_block_start_position + next_field
if file_length > next_fp_position >= 0:
vsi_file.seek(next_fp_position)
else:
break
return True