in ratatool-diffy/src/main/scala/com/spotify/ratatool/diffy/AvroDiffy.scala [71:183]
private def diff(x: AnyRef, y: AnyRef, schema: Schema, field: String): Seq[Delta] = {
val deltas = schema.getType match {
case Schema.Type.UNION =>
// union, must resolve to same type
val data = SpecificData.get()
val xTypeIndex = data.resolveUnion(schema, x)
val yTypeIndex = data.resolveUnion(schema, y)
if (xTypeIndex != yTypeIndex) {
// Use Option as x or y can be null
Seq(Delta(field, Option(x), Option(y), UnknownDelta))
} else {
// same fields, refined schema
val fieldSchema = schema.getTypes.get(xTypeIndex)
diff(x, y, fieldSchema, field)
}
case Schema.Type.RECORD =>
// record, compare all fields
val a = x.asInstanceOf[IndexedRecord]
val b = y.asInstanceOf[IndexedRecord]
for {
f <- schema.getFields.asScala.toSeq
pos = f.pos()
name = f.name()
fullName = if (field.isEmpty) name else field + "." + name
delta <- diff(a.get(pos), b.get(pos), f.schema(), fullName)
} yield delta
case Schema.Type.ARRAY
if unorderedFieldKeys.contains(field) && isRecord(schema.getElementType) =>
// keyed array, compare like Map[String, Record]
val keyField = unorderedFieldKeys(field)
val as =
x.asInstanceOf[java.util.List[GenericRecord]].asScala.map(r => r.get(keyField) -> r).toMap
val bs =
y.asInstanceOf[java.util.List[GenericRecord]].asScala.map(r => r.get(keyField) -> r).toMap
for {
k <- (as.keySet ++ bs.keySet).toSeq
elementField = field + s"[$k]"
delta <- (as.get(k), bs.get(k)) match {
case (Some(a), Some(b)) => diff(a, b, schema.getElementType, field)
case (a, b) => Seq(Delta(field, a, b, UnknownDelta))
}
} yield delta.copy(field = delta.field.replaceFirst(field, elementField))
case Schema.Type.ARRAY =>
// array, (un)ordered comparison
val xs = x.asInstanceOf[java.util.List[AnyRef]]
val ys = y.asInstanceOf[java.util.List[AnyRef]]
val (as, bs) = if (unordered.contains(field)) {
// ordered comparison
(sortList(xs).asScala, sortList(ys).asScala)
} else {
// unordered
(xs.asScala, ys.asScala)
}
val delta = if (as.size != bs.size) {
Some(UnknownDelta)
} else if (isNumericType(schema.getElementType.getType) && as != bs) {
Some(VectorDelta(vectorDelta(as.map(numericValue).toSeq, bs.map(numericValue).toSeq)))
} else if (as != bs) {
as.zip(bs)
.find { case (a, b) =>
a != b && diff(a, b, schema.getElementType, field).nonEmpty
}
.map(_ => UnknownDelta)
} else {
None
}
delta.map(d => Delta(field, Some(x), Some(y), d)).toSeq
case Schema.Type.MAP =>
// map, compare key set and values
val as = x.asInstanceOf[java.util.Map[CharSequence, AnyRef]].asScala.map { case (k, v) =>
k.toString -> v
}
val bs = y.asInstanceOf[java.util.Map[CharSequence, AnyRef]].asScala.map { case (k, v) =>
k.toString -> v
}
for {
k <- (as.keySet ++ bs.keySet).toSeq
elementField = field + s"[$k]"
delta <- (as.get(k), bs.get(k)) match {
case (Some(a), Some(b)) => diff(a, b, schema.getValueType, field)
case (a, b) => Seq(Delta(field, a, b, UnknownDelta))
}
} yield delta.copy(field = delta.field.replaceFirst(field, elementField))
case Schema.Type.STRING =>
// string, convert to java String for equality check
val a = x.asInstanceOf[CharSequence].toString
val b = y.asInstanceOf[CharSequence].toString
val delta = if (a == b) None else Some(StringDelta(stringDelta(a, b)))
delta.map(d => Delta(field, Some(x), Some(y), d)).toSeq
case t if isNumericType(t) =>
// numeric, convert to Double for equality check
val a = numericValue(x)
val b = numericValue(y)
val delta = if (a == b) None else Some(NumericDelta(numericDelta(a, b)))
delta.map(d => Delta(field, Some(x), Some(y), d)).toSeq
case _ =>
// other case rely on object equality
val delta = if (x == y) None else Some(UnknownDelta)
delta.map(d => Delta(field, Some(x), Some(y), d)).toSeq
}
deltas.filterNot(d => ignore.contains(d.field))
}