public List expand()

in sdk/src/main/java/com/google/cloud/dataflow/sdk/util/GcsUtil.java [143:206]


  public List<GcsPath> expand(GcsPath gcsPattern) throws IOException {
    Preconditions.checkArgument(isGcsPatternSupported(gcsPattern.getObject()));
    Matcher m = GLOB_PREFIX.matcher(gcsPattern.getObject());
    Pattern p = null;
    String prefix = null;
    if (!m.matches()) {
      // Not a glob.
      // Results of GCS storage list feature is only eventually consistent so we should not use that
      // feature to check the existence of single files.
      return ImmutableList.of(gcsPattern);
    } else {
      // Part before the first wildcard character.
      prefix = m.group("PREFIX");
      p = Pattern.compile(globToRegexp(gcsPattern.getObject()));
    }

    LOG.debug("matching files in bucket {}, prefix {} against pattern {}", gcsPattern.getBucket(),
        prefix, p.toString());

    // List all objects that start with the prefix (including objects in sub-directories).
    Storage.Objects.List listObject = storageClient.objects().list(gcsPattern.getBucket());
    listObject.setMaxResults(MAX_LIST_ITEMS_PER_CALL);
    listObject.setPrefix(prefix);

    String pageToken = null;
    List<GcsPath> results = new LinkedList<>();
    do {
      if (pageToken != null) {
        listObject.setPageToken(pageToken);
      }

      Objects objects;
      try {
        objects = ResilientOperation.retry(
            ResilientOperation.getGoogleRequestCallable(listObject),
            new AttemptBoundedExponentialBackOff(3, 200),
            RetryDeterminer.SOCKET_ERRORS,
            IOException.class);
      } catch (Exception e) {
        throw new IOException("Unable to match files in bucket " + gcsPattern.getBucket()
            +  ", prefix " + prefix + " against pattern " + p.toString(), e);
      }
      //Objects objects = listObject.execute();
      Preconditions.checkNotNull(objects);

      if (objects.getItems() == null) {
        break;
      }

      // Filter objects based on the regex.
      for (StorageObject o : objects.getItems()) {
        String name = o.getName();
        // Skip directories, which end with a slash.
        if (p.matcher(name).matches() && !name.endsWith("/")) {
          LOG.debug("Matched object: {}", name);
          results.add(GcsPath.fromObject(o));
        }
      }

      pageToken = objects.getNextPageToken();
    } while (pageToken != null);

    return results;
  }