public Tuple getNext()

in pig/src/main/java/com/twitter/elephantbird/pig/load/LzoBaseRegexLoader.java [48:100]


  public Tuple getNext() throws IOException {
	  if (reader == null) {
		  return null;
	  }

	  Pattern pattern = getPattern();
	  Matcher matcher = pattern.matcher("");
	  Object lineObj;
	  String line;
	  Tuple t = null;
	  // Read lines until a match is found, making sure there's no reading past the
	  // end of the assigned byte range.
	  try {
		  while (reader.nextKeyValue()) {

			  lineObj = reader.getCurrentValue();

			  if (lineObj == null) {
				  break;
			  }
			  line = lineObj.toString();
			  matcher = matcher.reset(line);
			  // Increment counters for the number of matched and unmatched lines.
			  if (matcher.find()) {

				  incrCounter(LzoBaseRegexLoaderCounters.MatchedRegexLines, 1L);
				  t = tupleFactory_.newTuple(matcher.groupCount());
				  for (int i = 1; i <= matcher.groupCount(); i++) {
					  if(matcher.group(i) != null) {
						  t.set(i - 1, matcher.group(i));
					  } else {
						  t.set(i - 1, "");
					  }
				  }
				  break;
			  } else {
				  incrCounter(LzoBaseRegexLoaderCounters.UnmatchedRegexLines, 1L);
				  // TODO: stop doing this, as it can slow down the job.
				  LOG.debug("No match for line " + line);
			  }

			  // If the read has walked beyond the end of the split, move on.

		  }
	  } catch (InterruptedException e) {
		  int errCode = 6018;
		  String errMsg = "Error while reading input";
		  throw new ExecException(errMsg, errCode,
				  PigException.REMOTE_ENVIRONMENT, e);
	  }

	  return t;
  }