public static ArrayList findColumnsToRead()

in rcfile/src/main/java/com/twitter/elephantbird/util/RCFileUtil.java [82:144]


  public static ArrayList<Integer> findColumnsToRead(
                                         Configuration      conf,
                                         List<Integer>      currFieldIds,
                                         ColumnarMetadata   storedInfo)
                                         throws IOException {

    ArrayList<Integer> columnsToRead = Lists.newArrayList();

    // first find the required fields
    ArrayList<Integer> requiredFieldIds = Lists.newArrayList();
    String reqFieldStr = conf.get(RCFileUtil.REQUIRED_FIELD_INDICES_CONF, "");

    int numKnownFields = currFieldIds.size();

    if (reqFieldStr == null || reqFieldStr.equals("")) {
      for(int i=0; i<numKnownFields; i++) {
        requiredFieldIds.add(currFieldIds.get(i));
      }
    } else {
      for (String str : reqFieldStr.split(",")) {
        int idx = Integer.valueOf(str);
        if (idx < 0 || idx >= numKnownFields) {
          throw new IOException("idx " + idx + " is out of range for known fields");
        }
        requiredFieldIds.add(currFieldIds.get(idx));
      }
    }

    List<Integer> storedFieldIds = storedInfo.getFieldIdList();

    for(int i=0; i < storedFieldIds.size(); i++) {
      int sid = storedFieldIds.get(i);
      if (sid > 0 && requiredFieldIds.contains(sid)) {
        columnsToRead.add(i);
      }
    }

    // unknown fields : the required fields that are not listed in storedFieldIds
    String unknownFields = "";
    for(int rid : requiredFieldIds) {
      if (!storedFieldIds.contains(rid)) {
        unknownFields += " " + rid;
      }
    }

    if (unknownFields.length() > 0) {
      int last = storedFieldIds.size() - 1;
      LOG.info("unknown fields among required fileds :" + unknownFields);
      if (storedFieldIds.get(last) != -1) { // not expected
        throw new IOException("No unknowns column in in input");
      }
      columnsToRead.add(last);
    }

    LOG.info(String.format(
        "reading %d%s out of %d stored columns for %d required columns",
        columnsToRead.size(),
        (unknownFields.length() > 0 ? " (including unknowns column)" : ""),
        storedInfo.getFieldIdList().size(),
        requiredFieldIds.size()));

    return columnsToRead;
  }