in dbeam-core/src/main/java/com/spotify/dbeam/args/ParallelQueryBuilder.java [145:174]
protected static List<QueryRange> generateRanges(
final long min, final long max, final int parallelism) {
// We try not to generate more than queryParallelism. Hence we don't want to loose number by
// rounding down. Also when queryParallelism is higher than max - min, we don't want 0 ranges
long bucketSize = (long) Math.ceil((double) (max - min) / (double) parallelism);
bucketSize = bucketSize == 0 ? 1 : bucketSize; // If max and min is same, we export only 1 query
final List<QueryRange> ranges = new ArrayList<>(parallelism);
long i = min;
while (i + bucketSize < max) {
// Include lower bound and exclude the upper bound.
ranges.add(new QueryRange(i, i + bucketSize, true));
i = i + bucketSize;
}
// Add last query
if (i + bucketSize >= max) {
// If bucket size exceeds max, we must use max and the predicate
// should include upper bound.
ranges.add(new QueryRange(i, max, false));
}
// If queryParallelism is higher than max-min, this will generate less ranges.
// But lets never generate more ranges.
checkState(
ranges.size() <= parallelism,
"Unable to generate expected number of ranges for given min max.");
return ranges;
}