in ratatool-sampling/src/main/scala/com/spotify/ratatool/samplers/BigSamplerAvro.scala [126:159]
private[samplers] def hashAvroField(
schema: Schema
)(r: GenericRecord, fieldStr: String, hasher: Hasher): Hasher =
hashAvroField(schema, r, fieldStr.split(BigSampler.fieldSep).toList, hasher)
private[samplers] def hashAvroField(
schema: Schema,
r: GenericRecord,
fieldPath: List[String],
hasher: Hasher
): Hasher = {
val (fieldName, fieldSchema, fieldValue) = getField(r, fieldPath, schema)
if (fieldValue == null) {
log.debug(
s"Field `${fieldName}` of type ${fieldSchema.getType} is null - won't account for hash"
)
hasher
} else {
val vs = if (fieldSchema.getType == Type.ARRAY) {
val elementType = fieldSchema.getElementType
fieldValue.asInstanceOf[JList[AnyRef]].asScala.map(v => (v, resolveUnion(elementType, v)))
} else {
Seq((fieldValue, fieldSchema))
}
vs.foldLeft(hasher) { case (h, (v, s)) =>
s.getType match {
case Type.RECORD =>
hashAvroField(s, v.asInstanceOf[GenericRecord], fieldPath.tail, hasher)
case _ => hashPrimitive(fieldName, s, v, h)
}
}
}
}