in flink-optimizer/src/main/java/org/apache/flink/optimizer/costs/CostEstimator.java [76:239]
public void costOperator(PlanNode n) {
// initialize costs objects with no costs
final Costs totalCosts = new Costs();
final long availableMemory = n.getGuaranteedAvailableMemory();
// add the shipping strategy costs
for (Channel channel : n.getInputs()) {
final Costs costs = new Costs();
// Plans that apply the same strategies, but at different points
// are equally expensive. For example, if a partitioning can be
// pushed below a Map function there is often no difference in plan
// costs between the pushed down version and the version that partitions
// after the Mapper. However, in those cases, we want the expensive
// strategy to appear later in the plan, as data reduction often occurs
// by large factors, while blowup is rare and typically by smaller fractions.
// We achieve this by adding a penalty to small penalty to the FORWARD strategy,
// weighted by the current plan depth (steps to the earliest data source).
// that way, later FORWARDS are more expensive than earlier forwards.
// Note that this only applies to the heuristic costs.
switch (channel.getShipStrategy()) {
case NONE:
throw new CompilerException(
"Cannot determine costs: Shipping strategy has not been set for an input.");
case FORWARD:
// costs.addHeuristicNetworkCost(channel.getMaxDepth());
break;
case PARTITION_RANDOM:
addRandomPartitioningCost(channel, costs);
break;
case PARTITION_HASH:
case PARTITION_CUSTOM:
addHashPartitioningCost(channel, costs);
break;
case PARTITION_RANGE:
addRangePartitionCost(channel, costs);
break;
case BROADCAST:
addBroadcastCost(channel, channel.getReplicationFactor(), costs);
break;
case PARTITION_FORCED_REBALANCE:
addRandomPartitioningCost(channel, costs);
break;
default:
throw new CompilerException("Unknown shipping strategy for input: " + channel.getShipStrategy());
}
switch (channel.getLocalStrategy()) {
case NONE:
break;
case SORT:
case COMBININGSORT:
addLocalSortCost(channel, costs);
break;
default:
throw new CompilerException("Unsupported local strategy for input: " + channel.getLocalStrategy());
}
if (channel.getTempMode() != null && channel.getTempMode() != TempMode.NONE) {
addArtificialDamCost(channel, 0, costs);
}
// adjust with the cost weight factor
if (channel.isOnDynamicPath()) {
costs.multiplyWith(channel.getCostWeight());
}
totalCosts.addCosts(costs);
}
Channel firstInput = null;
Channel secondInput = null;
Costs driverCosts = new Costs();
int costWeight = 1;
// adjust with the cost weight factor
if (n.isOnDynamicPath()) {
costWeight = n.getCostWeight();
}
// get the inputs, if we have some
{
Iterator<Channel> channels = n.getInputs().iterator();
if (channels.hasNext()) {
firstInput = channels.next();
}
if (channels.hasNext()) {
secondInput = channels.next();
}
}
// determine the local costs
switch (n.getDriverStrategy()) {
case NONE:
case UNARY_NO_OP:
case BINARY_NO_OP:
case MAP:
case MAP_PARTITION:
case FLAT_MAP:
case ALL_GROUP_REDUCE:
case ALL_REDUCE:
// this operations does not do any actual grouping, since every element is in the same single group
case CO_GROUP:
case CO_GROUP_RAW:
case SORTED_GROUP_REDUCE:
case SORTED_REDUCE:
// grouping or co-grouping over sorted streams for free
case SORTED_GROUP_COMBINE:
// partial grouping is always local and main memory resident. we should add a relative cpu cost at some point
// partial grouping is always local and main memory resident. we should add a relative cpu cost at some point
case ALL_GROUP_COMBINE:
case UNION:
// pipelined local union is for free
break;
case INNER_MERGE:
case FULL_OUTER_MERGE:
case LEFT_OUTER_MERGE:
case RIGHT_OUTER_MERGE:
addLocalMergeCost(firstInput, secondInput, driverCosts, costWeight);
break;
case HYBRIDHASH_BUILD_FIRST:
case RIGHT_HYBRIDHASH_BUILD_FIRST:
case LEFT_HYBRIDHASH_BUILD_FIRST:
case FULL_OUTER_HYBRIDHASH_BUILD_FIRST:
addHybridHashCosts(firstInput, secondInput, driverCosts, costWeight);
break;
case HYBRIDHASH_BUILD_SECOND:
case LEFT_HYBRIDHASH_BUILD_SECOND:
case RIGHT_HYBRIDHASH_BUILD_SECOND:
case FULL_OUTER_HYBRIDHASH_BUILD_SECOND:
addHybridHashCosts(secondInput, firstInput, driverCosts, costWeight);
break;
case HYBRIDHASH_BUILD_FIRST_CACHED:
addCachedHybridHashCosts(firstInput, secondInput, driverCosts, costWeight);
break;
case HYBRIDHASH_BUILD_SECOND_CACHED:
addCachedHybridHashCosts(secondInput, firstInput, driverCosts, costWeight);
break;
case NESTEDLOOP_BLOCKED_OUTER_FIRST:
addBlockNestedLoopsCosts(firstInput, secondInput, availableMemory, driverCosts, costWeight);
break;
case NESTEDLOOP_BLOCKED_OUTER_SECOND:
addBlockNestedLoopsCosts(secondInput, firstInput, availableMemory, driverCosts, costWeight);
break;
case NESTEDLOOP_STREAMED_OUTER_FIRST:
addStreamedNestedLoopsCosts(firstInput, secondInput, availableMemory, driverCosts, costWeight);
break;
case NESTEDLOOP_STREAMED_OUTER_SECOND:
addStreamedNestedLoopsCosts(secondInput, firstInput, availableMemory, driverCosts, costWeight);
break;
default:
throw new CompilerException("Unknown local strategy: " + n.getDriverStrategy().name());
}
totalCosts.addCosts(driverCosts);
n.setCosts(totalCosts);
}