in ratatool-sampling/src/main/scala/com/spotify/ratatool/samplers/package.scala [62:92]
def sampleAvro[T <: GenericRecord: ClassTag: Coder](
coll: SCollection[T],
fraction: Double,
schema: => Schema,
fields: Seq[String] = Seq(),
seed: Option[Int] = None,
hashAlgorithm: HashAlgorithm = FarmHash,
distribution: Option[SampleDistribution] = None,
distributionFields: Seq[String] = Seq(),
precision: Precision = Approximate,
maxKeySize: Int = 1e6.toInt,
byteEncoding: ByteEncoding = RawEncoding
): SCollection[T] = {
val schemaSer = schema.toString(false)
@transient lazy val schemaSerDe = new Schema.Parser().parse(schemaSer)
BigSampler.sample(
coll,
fraction,
fields,
seed,
hashAlgorithm,
distribution,
distributionFields,
precision,
BigSamplerAvro.hashAvroField(schemaSerDe),
BigSamplerAvro.buildKey(schemaSerDe, distributionFields),
maxKeySize,
byteEncoding
)
}