private Vector convertSparseVectorDataToVector()

in mahout/src/main/java/com/twitter/elephantbird/pig/mahout/VectorWritableConverter.java [409:457]


  private Vector convertSparseVectorDataToVector(Tuple value) throws IOException {
    Vector v;

    // determine output vector size and fetch bag containing entries from input
    int size = 0;
    DataBag entries = null;
    if (value.size() == 2) {
      // cardinality defined by input
      size = (Integer) value.get(0);
      if (cardinality != null) {
        // cardinality defined by VectorWritableConverter instance
        size = cardinality;
      }
      entries = (DataBag) value.get(1);
    } else {
      Preconditions.checkNotNull(cardinality, "Cardinality is undefined");
      size = cardinality;
      entries = (DataBag) value.get(0);
    }

    // create vector, allowing conversion of sparse input vector data to dense output vector
    if (dense) {
      // TODO(Andy Schlaikjer): Test for OOM before it happens
      v = new DenseVector(size);
    } else {
      // more efficient to build sparse vector with this impl
      v = new RandomAccessSparseVector(size);
    }

    // populate vector
    for (Tuple entry : entries) {
      validateSparseVectorEntryData(entry);
      int i = (Integer) entry.get(0);
      // check index bounds
      if (i < 0 || i >= size) {
        counterHelper.incrCounter(Counter.INDEX_OUT_OF_BOUNDS, 1);
        continue;
      }
      double n = ((Number) entry.get(1)).doubleValue();
      v.setQuick(i, n);
    }

    // convert to (sparse) sequential vector if requested
    if (sequential) {
      v = new SequentialAccessSparseVector(v);
    }

    return v;
  }