private[samplers] def hashTableRow()

in ratatool-sampling/src/main/scala/com/spotify/ratatool/samplers/BigSamplerBigQuery.scala [42:93]


  private[samplers] def hashTableRow(
    tblSchema: => Seq[TableFieldSchema]
  )(r: TableRow, fieldStr: String, hasher: Hasher): Hasher = {
    val subfields = fieldStr.split(BigSampler.fieldSep)
    val field = tblSchema.find(_.getName == subfields.head).getOrElse {
      throw new NoSuchElementException(s"Can't find field `$fieldStr` in the schema $tblSchema")
    }
    val v = r.get(subfields.head)
    if (v == null) {
      log.debug(
        s"Field `${field.getName}` of type ${field.getType} and mode ${field.getMode}" +
          s" is null - won't account for hash"
      )
      hasher
    } else {
      val vs = if (field.getMode == "REPEATED") {
        v.asInstanceOf[JList[AnyRef]].asScala
      } else {
        Seq(v)
      }
      field.getType match {
        case "BOOLEAN" =>
          vs.foldLeft(hasher)((hasher, v) => hasher.putBoolean(v.toString.toBoolean))
        case "INTEGER" => vs.foldLeft(hasher)((hasher, v) => hasher.putLong(v.toString.toLong))
        case "FLOAT"   => vs.foldLeft(hasher)((hasher, v) => hasher.putFloat(v.toString.toFloat))
        case "STRING" =>
          vs.foldLeft(hasher)((hasher, v) => hasher.putString(v.toString, BigSampler.utf8Charset))
        case "BYTES" =>
          vs.foldLeft(hasher)((hasher, v) => hasher.putBytes(v.asInstanceOf[Array[Byte]]))
        case "TIMESTAMP" =>
          vs.foldLeft(hasher)((hasher, v) => hasher.putString(v.toString, BigSampler.utf8Charset))
        case "DATE" =>
          vs.foldLeft(hasher)((hasher, v) => hasher.putString(v.toString, BigSampler.utf8Charset))
        case "TIME" =>
          vs.foldLeft(hasher)((hasher, v) => hasher.putString(v.toString, BigSampler.utf8Charset))
        case "DATETIME" =>
          vs.foldLeft(hasher)((hasher, v) => hasher.putString(v.toString, BigSampler.utf8Charset))
        case "RECORD" =>
          vs.foldLeft(hasher)((hasher, vi) =>
            hashTableRow(field.getFields.asScala.toList)(
              TableRow(vi.asInstanceOf[java.util.Map[String, Any]].asScala.toList: _*),
              subfields.tail.mkString(BigSampler.fieldSep.toString),
              hasher
            )
          )
        case t =>
          throw new UnsupportedOperationException(
            s"Type `$t` of field `${field.getName}` is not supported as sampling key"
          )
      }
    }
  }