in ratatool-sampling/src/main/scala/com/spotify/ratatool/samplers/BigSamplerAvro.scala [161:185]
private def hashPrimitive(
fieldName: String,
fieldSchema: Schema,
fieldValue: AnyRef,
hasher: Hasher
): Hasher = {
fieldSchema.getType match {
case Type.ENUM => hashEnum(fieldName, fieldSchema, fieldValue, hasher)
case Type.STRING =>
hasher.putString(fieldValue.asInstanceOf[CharSequence], BigSampler.utf8Charset)
case Type.BYTES => hashBytes(fieldName, fieldSchema, fieldValue, hasher)
// to keep it consistent with BigQuery INT - convert int to long
case Type.INT => hasher.putLong(fieldValue.asInstanceOf[Int].toLong)
case Type.LONG => hasher.putLong(fieldValue.asInstanceOf[Long])
case Type.FLOAT => hasher.putFloat(fieldValue.asInstanceOf[Float])
case Type.DOUBLE => hasher.putDouble(fieldValue.asInstanceOf[Double])
case Type.BOOLEAN => hasher.putBoolean(fieldValue.asInstanceOf[Boolean])
case Type.FIXED => hashBytes(fieldName, fieldSchema, fieldValue, hasher)
case Type.NULL => hasher // Ignore nulls
case t =>
throw new UnsupportedOperationException(
s"Type `${fieldSchema.getType}` of `${fieldName}` is not supported as sampling key!"
)
}
}