in rcfile/src/main/java/com/twitter/elephantbird/mapreduce/output/RCFileThriftOutputFormat.java [154:207]
private void fromBytes(BytesWritable bytesWritable)
throws IOException, InterruptedException, TException {
if (mTransport == null) {
initIdMap();
mTransport = new TMemoryInputTransport();
skipProto = new TBinaryProtocol(mTransport);
}
byte[] bytes = bytesWritable.getBytes();
mTransport.reset(bytes, 0, bytesWritable.getLength());
byteStream.reset();
// set all the fields to null
for(BytesRefWritable ref : colValRefs) {
ref.set(bytes, 0, 0);
}
skipProto.readStructBegin();
while (true) {
int start = mTransport.getBufferPosition();
TField field = skipProto.readFieldBegin();
if (field.type == TType.STOP) {
break;
}
int fieldStart = mTransport.getBufferPosition();
// skip still creates and copies primitive objects (String, buffer, etc)
// skipProto could override readString() and readBuffer() to avoid that.
TProtocolUtil.skip(skipProto, field.type);
int end = mTransport.getBufferPosition();
Integer idx = idMap.get(field.id);
if (idx != null && field.type == tFields.get(idx).getType()) {
// known field
colValRefs[idx].set(bytes, fieldStart, end-fieldStart);
} else {
// unknown field, copy the bytes to last column (with field id)
byteStream.write(bytes, start, end-start);
}
}
if (byteStream.getCount() > 0) {
byteStream.write(TType.STOP);
colValRefs[colValRefs.length-1].set(byteStream.getData(),
0,
byteStream.getCount());
}
}