def verify_split_pattern_specs()

in tfx/components/example_gen/utils.py [0:0]


def verify_split_pattern_specs(
    split: example_gen_pb2.Input.Split) -> Tuple[bool, bool, bool]:
  """Verify and identify specs to be matched in split pattern."""
  # Match occurences of pattern '{SPAN}|{SPAN:*}'. If it exists, capture
  # span width modifier. Otherwise, the empty string is captured.
  span_matches = re.findall(SPAN_FULL_REGEX, split.pattern)
  is_match_span = bool(span_matches)

  # Match occurences of pattern '{VERSION}|{VERSION:*}'. If it exists, capture
  # version width modifier. Otherwise, the empty string is captured.
  version_matches = re.findall(VERSION_FULL_REGEX, split.pattern)
  is_match_version = bool(version_matches)

  is_match_date = any(spec in split.pattern for spec in DATE_SPECS)

  if [is_match_span, is_match_date].count(True) > 1:
    raise ValueError(
        'Either span spec or date specs must be specified exclusively in %s' %
        split.pattern)

  if is_match_span and len(span_matches) != 1:
    raise ValueError('Only one %s is allowed in %s' %
                     (SPAN_SPEC, split.pattern))

  if is_match_date and not all(
      split.pattern.count(spec) == 1 for spec in DATE_SPECS):
    raise ValueError(
        'Exactly one of each date spec (%s, %s, %s) is required in %s' %
        (YEAR_SPEC, MONTH_SPEC, DAY_SPEC, split.pattern))

  if is_match_version and (not is_match_span and not is_match_date):
    raise ValueError(
        'Version spec provided, but Span or Date spec is not present in %s' %
        split.pattern)

  if is_match_version and len(version_matches) != 1:
    raise ValueError('Only one %s is allowed in %s' %
                     (VERSION_SPEC, split.pattern))

  return is_match_span, is_match_date, is_match_version