in mahout/src/main/java/com/twitter/elephantbird/pig/mahout/VectorWritableConverter.java [409:457]
private Vector convertSparseVectorDataToVector(Tuple value) throws IOException {
Vector v;
// determine output vector size and fetch bag containing entries from input
int size = 0;
DataBag entries = null;
if (value.size() == 2) {
// cardinality defined by input
size = (Integer) value.get(0);
if (cardinality != null) {
// cardinality defined by VectorWritableConverter instance
size = cardinality;
}
entries = (DataBag) value.get(1);
} else {
Preconditions.checkNotNull(cardinality, "Cardinality is undefined");
size = cardinality;
entries = (DataBag) value.get(0);
}
// create vector, allowing conversion of sparse input vector data to dense output vector
if (dense) {
// TODO(Andy Schlaikjer): Test for OOM before it happens
v = new DenseVector(size);
} else {
// more efficient to build sparse vector with this impl
v = new RandomAccessSparseVector(size);
}
// populate vector
for (Tuple entry : entries) {
validateSparseVectorEntryData(entry);
int i = (Integer) entry.get(0);
// check index bounds
if (i < 0 || i >= size) {
counterHelper.incrCounter(Counter.INDEX_OUT_OF_BOUNDS, 1);
continue;
}
double n = ((Number) entry.get(1)).doubleValue();
v.setQuick(i, n);
}
// convert to (sparse) sequential vector if requested
if (sequential) {
v = new SequentialAccessSparseVector(v);
}
return v;
}