in tfx/components/example_gen/utils.py [0:0]
def verify_split_pattern_specs(
split: example_gen_pb2.Input.Split) -> Tuple[bool, bool, bool]:
"""Verify and identify specs to be matched in split pattern."""
# Match occurences of pattern '{SPAN}|{SPAN:*}'. If it exists, capture
# span width modifier. Otherwise, the empty string is captured.
span_matches = re.findall(SPAN_FULL_REGEX, split.pattern)
is_match_span = bool(span_matches)
# Match occurences of pattern '{VERSION}|{VERSION:*}'. If it exists, capture
# version width modifier. Otherwise, the empty string is captured.
version_matches = re.findall(VERSION_FULL_REGEX, split.pattern)
is_match_version = bool(version_matches)
is_match_date = any(spec in split.pattern for spec in DATE_SPECS)
if [is_match_span, is_match_date].count(True) > 1:
raise ValueError(
'Either span spec or date specs must be specified exclusively in %s' %
split.pattern)
if is_match_span and len(span_matches) != 1:
raise ValueError('Only one %s is allowed in %s' %
(SPAN_SPEC, split.pattern))
if is_match_date and not all(
split.pattern.count(spec) == 1 for spec in DATE_SPECS):
raise ValueError(
'Exactly one of each date spec (%s, %s, %s) is required in %s' %
(YEAR_SPEC, MONTH_SPEC, DAY_SPEC, split.pattern))
if is_match_version and (not is_match_span and not is_match_date):
raise ValueError(
'Version spec provided, but Span or Date spec is not present in %s' %
split.pattern)
if is_match_version and len(version_matches) != 1:
raise ValueError('Only one %s is allowed in %s' %
(VERSION_SPEC, split.pattern))
return is_match_span, is_match_date, is_match_version