def sampleAvro[T <: GenericRecord: ClassTag: Coder]()

in ratatool-sampling/src/main/scala/com/spotify/ratatool/samplers/package.scala [62:92]


  def sampleAvro[T <: GenericRecord: ClassTag: Coder](
    coll: SCollection[T],
    fraction: Double,
    schema: => Schema,
    fields: Seq[String] = Seq(),
    seed: Option[Int] = None,
    hashAlgorithm: HashAlgorithm = FarmHash,
    distribution: Option[SampleDistribution] = None,
    distributionFields: Seq[String] = Seq(),
    precision: Precision = Approximate,
    maxKeySize: Int = 1e6.toInt,
    byteEncoding: ByteEncoding = RawEncoding
  ): SCollection[T] = {
    val schemaSer = schema.toString(false)
    @transient lazy val schemaSerDe = new Schema.Parser().parse(schemaSer)

    BigSampler.sample(
      coll,
      fraction,
      fields,
      seed,
      hashAlgorithm,
      distribution,
      distributionFields,
      precision,
      BigSamplerAvro.hashAvroField(schemaSerDe),
      BigSamplerAvro.buildKey(schemaSerDe, distributionFields),
      maxKeySize,
      byteEncoding
    )
  }