in src/main/scala/com/spotify/bdrc/pipeline/BloomFilterSetDifference.scala [41:48]
def scio(lhs: SCollection[String], rhs: SCollection[String]): SCollection[String] = {
val width = BloomFilter.optimalWidth(1000, 0.01).get
val numHashes = BloomFilter.optimalNumHashes(1000, width)
lhs
.cross(rhs.aggregate(BloomFilterAggregator[String](numHashes, width)))
.filter { case (s, bf) => bf.contains(s).isTrue }
.keys
}