def sampleTableRow()

in ratatool-sampling/src/main/scala/com/spotify/ratatool/samplers/package.scala [119:150]


  def sampleTableRow(
    coll: SCollection[TableRow],
    fraction: Double,
    schema: TableSchema,
    fields: Seq[String] = Seq(),
    seed: Option[Int] = None,
    hashAlgorithm: HashAlgorithm = FarmHash,
    distribution: Option[SampleDistribution] = None,
    distributionFields: Seq[String] = Seq(),
    precision: Precision = Approximate,
    maxKeySize: Int = 1e6.toInt,
    byteEncoding: ByteEncoding = RawEncoding
  ): SCollection[TableRow] = {
    val schemaStr = JsonSerDe.toJsonString(schema)
    @transient lazy val schemaFields =
      JsonSerDe.fromJsonString(schemaStr, classOf[TableSchema]).getFields.asScala.toList

    BigSampler.sample(
      coll,
      fraction,
      fields,
      seed,
      hashAlgorithm,
      distribution,
      distributionFields,
      precision,
      BigSamplerBigQuery.hashTableRow(schemaFields),
      BigSamplerBigQuery.buildKey(schemaFields, distributionFields),
      maxKeySize,
      byteEncoding
    )
  }