def get_explicit_datetime_inputs_files()

in projects/home/recap/data/dataset.py [0:0]


def get_explicit_datetime_inputs_files(explicit_datetime_inputs):
  """
  Compile list of files for training/validation.

  Used with DataConfigs that use the `explicit_datetime_inputs` format to specify data.
  For each hour of data, if the directory is missing or empty, we increment a counter to keep
  track of the number of missing data hours.
  Returns only files with a `.gz` extension.

  Args:
    explicit_datetime_inputs: An `ExplicitDatetimeInputs` object within a `datasets.DataConfig` object

  Returns:
    data_files: Sorted list of files to read corresponding to data at the desired datetimes
    num_hours_missing: Number of hours that we are missing data

  """
  datetimes = get_datetimes(explicit_datetime_inputs)
  folders = [os.path.join(explicit_datetime_inputs.data_root, datetime) for datetime in datetimes]
  data_files = []
  num_hours_missing = 0
  for folder in folders:
    try:
      files = tf.io.gfile.listdir(folder)
      if not files:
        logging.warning(f"{folder} contained no data files")
        num_hours_missing += 1
      data_files.extend(
        [
          os.path.join(folder, filename)
          for filename in files
          if filename.rsplit(".", 1)[-1].lower() == "gz"
        ]
      )
    except tf.errors.NotFoundError as e:
      num_hours_missing += 1
      logging.warning(f"Cannot find directory {folder}. Missing one hour of data. Error: \n {e}")
  return sorted(data_files), num_hours_missing