private def diff()

in ratatool-diffy/src/main/scala/com/spotify/ratatool/diffy/AvroDiffy.scala [71:183]


  private def diff(x: AnyRef, y: AnyRef, schema: Schema, field: String): Seq[Delta] = {
    val deltas = schema.getType match {
      case Schema.Type.UNION =>
        // union, must resolve to same type
        val data = SpecificData.get()
        val xTypeIndex = data.resolveUnion(schema, x)
        val yTypeIndex = data.resolveUnion(schema, y)
        if (xTypeIndex != yTypeIndex) {
          // Use Option as x or y can be null
          Seq(Delta(field, Option(x), Option(y), UnknownDelta))
        } else {
          // same fields, refined schema
          val fieldSchema = schema.getTypes.get(xTypeIndex)
          diff(x, y, fieldSchema, field)
        }

      case Schema.Type.RECORD =>
        // record, compare all fields
        val a = x.asInstanceOf[IndexedRecord]
        val b = y.asInstanceOf[IndexedRecord]
        for {
          f <- schema.getFields.asScala.toSeq
          pos = f.pos()
          name = f.name()
          fullName = if (field.isEmpty) name else field + "." + name
          delta <- diff(a.get(pos), b.get(pos), f.schema(), fullName)
        } yield delta

      case Schema.Type.ARRAY
          if unorderedFieldKeys.contains(field) && isRecord(schema.getElementType) =>
        // keyed array, compare like Map[String, Record]
        val keyField = unorderedFieldKeys(field)
        val as =
          x.asInstanceOf[java.util.List[GenericRecord]].asScala.map(r => r.get(keyField) -> r).toMap
        val bs =
          y.asInstanceOf[java.util.List[GenericRecord]].asScala.map(r => r.get(keyField) -> r).toMap

        for {
          k <- (as.keySet ++ bs.keySet).toSeq
          elementField = field + s"[$k]"
          delta <- (as.get(k), bs.get(k)) match {
            case (Some(a), Some(b)) => diff(a, b, schema.getElementType, field)
            case (a, b)             => Seq(Delta(field, a, b, UnknownDelta))
          }
        } yield delta.copy(field = delta.field.replaceFirst(field, elementField))

      case Schema.Type.ARRAY =>
        // array, (un)ordered comparison
        val xs = x.asInstanceOf[java.util.List[AnyRef]]
        val ys = y.asInstanceOf[java.util.List[AnyRef]]
        val (as, bs) = if (unordered.contains(field)) {
          // ordered comparison
          (sortList(xs).asScala, sortList(ys).asScala)
        } else {
          // unordered
          (xs.asScala, ys.asScala)
        }

        val delta = if (as.size != bs.size) {
          Some(UnknownDelta)
        } else if (isNumericType(schema.getElementType.getType) && as != bs) {
          Some(VectorDelta(vectorDelta(as.map(numericValue).toSeq, bs.map(numericValue).toSeq)))
        } else if (as != bs) {
          as.zip(bs)
            .find { case (a, b) =>
              a != b && diff(a, b, schema.getElementType, field).nonEmpty
            }
            .map(_ => UnknownDelta)
        } else {
          None
        }
        delta.map(d => Delta(field, Some(x), Some(y), d)).toSeq

      case Schema.Type.MAP =>
        // map, compare key set and values
        val as = x.asInstanceOf[java.util.Map[CharSequence, AnyRef]].asScala.map { case (k, v) =>
          k.toString -> v
        }
        val bs = y.asInstanceOf[java.util.Map[CharSequence, AnyRef]].asScala.map { case (k, v) =>
          k.toString -> v
        }

        for {
          k <- (as.keySet ++ bs.keySet).toSeq
          elementField = field + s"[$k]"
          delta <- (as.get(k), bs.get(k)) match {
            case (Some(a), Some(b)) => diff(a, b, schema.getValueType, field)
            case (a, b)             => Seq(Delta(field, a, b, UnknownDelta))
          }
        } yield delta.copy(field = delta.field.replaceFirst(field, elementField))

      case Schema.Type.STRING =>
        // string, convert to java String for equality check
        val a = x.asInstanceOf[CharSequence].toString
        val b = y.asInstanceOf[CharSequence].toString
        val delta = if (a == b) None else Some(StringDelta(stringDelta(a, b)))
        delta.map(d => Delta(field, Some(x), Some(y), d)).toSeq

      case t if isNumericType(t) =>
        // numeric, convert to Double for equality check
        val a = numericValue(x)
        val b = numericValue(y)
        val delta = if (a == b) None else Some(NumericDelta(numericDelta(a, b)))
        delta.map(d => Delta(field, Some(x), Some(y), d)).toSeq

      case _ =>
        // other case rely on object equality
        val delta = if (x == y) None else Some(UnknownDelta)
        delta.map(d => Delta(field, Some(x), Some(y), d)).toSeq
    }

    deltas.filterNot(d => ignore.contains(d.field))
  }