clusterloader2/testing/load/config.yaml (290 lines of code) (raw):
# ASSUMPTIONS:
# - Underlying cluster should have 100+ nodes.
# - Number of nodes should be divisible by NODES_PER_NAMESPACE (default 100).
# - The number of created SVCs is half the number of created Deployments.
# - Only half of Deployments will be assigned 1-1 to existing SVCs.
#Constants
# Cater for the case where the number of nodes is less than nodes per namespace. See https://github.com/kubernetes/perf-tests/issues/887
{{$NODES_PER_NAMESPACE := MinInt .Nodes (DefaultParam .NODES_PER_NAMESPACE 100)}}
# See https://github.com/kubernetes/perf-tests/pull/1667#issuecomment-769642266
{{$IS_SMALL_CLUSTER := lt .Nodes 100}}
{{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 30}}
{{$LOAD_TEST_THROUGHPUT := DefaultParam .CL2_LOAD_TEST_THROUGHPUT 10}}
{{$DELETE_TEST_THROUGHPUT := DefaultParam .CL2_DELETE_TEST_THROUGHPUT $LOAD_TEST_THROUGHPUT}}
{{$RATE_LIMIT_POD_CREATION := DefaultParam .CL2_RATE_LIMIT_POD_CREATION true}}
{{$BIG_GROUP_SIZE := DefaultParam .BIG_GROUP_SIZE 250}}
{{$MEDIUM_GROUP_SIZE := DefaultParam .MEDIUM_GROUP_SIZE 30}}
{{$SMALL_GROUP_SIZE := DefaultParam .SMALL_GROUP_SIZE 5}}
{{$SMALL_STATEFUL_SETS_PER_NAMESPACE := DefaultParam .SMALL_STATEFUL_SETS_PER_NAMESPACE 1}}
{{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE := DefaultParam .MEDIUM_STATEFUL_SETS_PER_NAMESPACE 1}}
{{$ENABLE_CHAOSMONKEY := DefaultParam .ENABLE_CHAOSMONKEY false}}
{{$ENABLE_API_AVAILABILITY_MEASUREMENT := DefaultParam .CL2_ENABLE_API_AVAILABILITY_MEASUREMENT false}}
{{$RANDOM_SCALE_FACTOR := 0.5}}
#Variables
{{$namespaces := DivideInt .Nodes $NODES_PER_NAMESPACE}}
{{$totalPods := MultiplyInt $namespaces $NODES_PER_NAMESPACE $PODS_PER_NODE}}
{{$podsPerNamespace := DivideInt $totalPods $namespaces}}
{{$saturationTime := DivideInt $totalPods $LOAD_TEST_THROUGHPUT}}
{{$deletionTime := DivideInt $totalPods $DELETE_TEST_THROUGHPUT}}
# bigDeployments - 1/4 of namespace pods should be in big Deployments.
{{$bigDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $BIG_GROUP_SIZE)}}
# mediumDeployments - 1/4 of namespace pods should be in medium Deployments.
{{$mediumDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $MEDIUM_GROUP_SIZE)}}
# smallDeployments - 1/2 of namespace pods should be in small Deployments.
{{$smallDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 2 $SMALL_GROUP_SIZE)}}
# Stateful sets are enabled. Reduce the number of small and medium deployments per namespace
# See https://github.com/kubernetes/perf-tests/issues/1036#issuecomment-607631768
# Ensure non zero or negative after subtraction.
{{$smallDeploymentsPerNamespace := MaxInt 0 (SubtractInt $smallDeploymentsPerNamespace $SMALL_STATEFUL_SETS_PER_NAMESPACE)}}
{{$mediumDeploymentsPerNamespace := MaxInt 0 (SubtractInt $mediumDeploymentsPerNamespace $MEDIUM_STATEFUL_SETS_PER_NAMESPACE)}}
# Jobs are enabled. Reduce the number of small, medium, big deployments per namespace.
# Ensure non zero or negative after subtraction.
{{$smallDeploymentsPerNamespace := MaxInt 0 (SubtractInt $smallDeploymentsPerNamespace 1)}}
{{$mediumDeploymentsPerNamespace := MaxInt 0 (SubtractInt $mediumDeploymentsPerNamespace 1)}}
{{$bigDeploymentsPerNamespace := MaxInt 0 (SubtractInt $bigDeploymentsPerNamespace 1)}}
# Disable big jobs on small clusters.
{{$bigJobsPerNamespace := IfThenElse $IS_SMALL_CLUSTER 0 1}}
# The minimal number of pods to be used to measure various things like
# pod-startup-latency or scheduler-throughput. The purpose of it is to avoid
# problems in small clusters where we wouldn't have enough samples (pods) to
# measure things accurately.
{{$MIN_PODS_IN_SMALL_CLUSTERS := 500}}
# BEGIN scheduler-throughput section
# TODO( https://github.com/kubernetes/perf-tests/issues/1027): Lower the number of "min-pods" once we fix the scheduler throughput measurement.
{{$totalSchedulerThroughputPods := MaxInt (MultiplyInt 2 $MIN_PODS_IN_SMALL_CLUSTERS) .Nodes}}
# Determines number of pods per deployment. Should be a divider of $totalSchedulerThroughputPods.
{{$schedulerThroughputPodsPerDeployment := DefaultParam .CL2_SCHEDULER_THROUGHPUT_PODS_PER_DEPLOYMENT $totalSchedulerThroughputPods}}
{{$schedulerThroughputNamespaces := DivideInt $totalSchedulerThroughputPods $schedulerThroughputPodsPerDeployment}}
# Set schedulerThroughputNamespaces to 1 on small clusters otherwise it will result
# in an unnecessary number of namespaces.
{{$schedulerThroughputNamespaces := IfThenElse $IS_SMALL_CLUSTER 1 $schedulerThroughputNamespaces}}
# END scheduler-throughput section
# Command to be executed
{{$EXEC_COMMAND := DefaultParam .CL2_EXEC_COMMAND nil}}
{{$EXIT_AFTER_EXEC := DefaultParam .CL2_EXIT_AFTER_EXEC false}}
{{$EXEC_TIMEOUT := DefaultParam .CL2_EXEC_TIMEOUT "3600s"}}
{{$SLEEP_AFTER_EXEC_DURATION := DefaultParam .CL2_SLEEP_AFTER_EXEC_DURATION "0s"}}
{{$latencyPodImage := DefaultParam .CL2_LATENCY_POD_IMAGE "k8s.gcr.io/pause:3.1"}}
{{$defaultQps := DefaultParam .CL2_DEFAULT_QPS (IfThenElse (le .Nodes 500) 10 100)}}
name: load
namespace:
number: {{AddInt $namespaces $schedulerThroughputNamespaces}}
tuningSets:
- name: Sequence
parallelismLimitedLoad:
parallelismLimit: 1
# TODO(https://github.com/kubernetes/perf-tests/issues/1024): This TuningSet is used only for pod-startup-latency, get rid of it
# Uniform5qps: for each running phase, use 5 qps.
- name: Uniform5qps
qpsLoad:
qps: 5
# default is a tuningset that is meant to be used when we don't have any specific requirements on pace of operations.
- name: default
globalQPSLoad:
qps: {{$defaultQps}}
burst: 1
- name: RandomizedSaturationTimeLimited
RandomizedTimeLimitedLoad:
timeLimit: {{$saturationTime}}s
- name: RandomizedScalingTimeLimited
RandomizedTimeLimitedLoad:
# The expected number of created/deleted pods is totalPods/4 when scaling,
# as each RS changes its size from X to a uniform random value in [X/2, 3X/2].
# To match 10 [pods/s] requirement, we need to divide saturationTime by 4.
timeLimit: {{DivideInt $saturationTime 4}}s
- name: RandomizedDeletionTimeLimited
RandomizedTimeLimitedLoad:
timeLimit: {{$deletionTime}}s
{{if $ENABLE_CHAOSMONKEY}}
chaosMonkey:
nodeFailure:
failureRate: 0.01
interval: 5m
jitterFactor: 2.0
simulatedDowntime: 10m
{{end}}
steps:
- module:
path: /modules/measurements.yaml
params:
action: start
- module:
path: modules/services.yaml
params:
actionName: "Creating"
namespaces: {{$namespaces}}
smallServicesPerNamespace: {{DivideInt (AddInt $smallDeploymentsPerNamespace 1) 2}}
mediumServicesPerNamespace: {{DivideInt (AddInt $mediumDeploymentsPerNamespace 1) 2}}
bigServicesPerNamespace: {{DivideInt (AddInt $bigDeploymentsPerNamespace 1) 2}}
- name: Creating PriorityClass for DaemonSets
phases:
- replicasPerNamespace: 1
tuningSet: Sequence
objectBundle:
- basename: daemonset-priorityclass
objectTemplatePath: daemonset-priorityclass.yaml
# Moved from reconcile-objects.yaml to mitigate https://github.com/kubernetes/kubernetes/issues/96635.
# TODO(https://github.com/kubernetes/perf-tests/issues/1823): Merge back to reconcile-objects.yaml once the k/k bug is fixed.
- module:
path: /modules/configmaps-secrets.yaml
params:
actionName: create
tuningSet: default
namespaces: {{$namespaces}}
bigDeploymentsPerNamespace: {{$bigDeploymentsPerNamespace}}
mediumDeploymentsPerNamespace: {{$mediumDeploymentsPerNamespace}}
smallDeploymentsPerNamespace: {{$smallDeploymentsPerNamespace}}
- module:
path: /modules/reconcile-objects.yaml
params:
actionName: "create"
namespaces: {{$namespaces}}
{{if $RATE_LIMIT_POD_CREATION}}
tuningSet: RandomizedSaturationTimeLimited
operationTimeout: 15m
{{else}}
tuningSet: default
operationTimeout: {{AddInt $saturationTime 900}}s
{{end}}
testMaxReplicaFactor: {{$RANDOM_SCALE_FACTOR}}
# We rely on the fact that daemonset is using the same image as the 'pod-startup-latency' module.
# The goal is to cache the image to all nodes before we start any latency pod,
# so that when we measure pod startup latency, the image is already present on all nodes.
# This way, the pod startup latency we measure excludes (or limits impact) of image pulling,
# whuch matches our SLO definition: https://github.com/kubernetes/community/blob/master/sig-scalability/slos/pod_startup_latency.md.
daemonSetImage: {{$latencyPodImage}}
daemonSetEnv: "before update"
daemonSetReplicas: 1
bigDeploymentSize: {{$BIG_GROUP_SIZE}}
bigDeploymentsPerNamespace: {{$bigDeploymentsPerNamespace}}
mediumDeploymentSize: {{$MEDIUM_GROUP_SIZE}}
mediumDeploymentsPerNamespace: {{$mediumDeploymentsPerNamespace}}
smallDeploymentSize: {{$SMALL_GROUP_SIZE}}
smallDeploymentsPerNamespace: {{$smallDeploymentsPerNamespace}}
smallStatefulSetSize: {{$SMALL_GROUP_SIZE}}
smallStatefulSetsPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}}
mediumStatefulSetSize: {{$MEDIUM_GROUP_SIZE}}
mediumStatefulSetsPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}}
bigJobSize: {{$BIG_GROUP_SIZE}}
bigJobsPerNamespace: {{$bigJobsPerNamespace}}
mediumJobSize: {{$MEDIUM_GROUP_SIZE}}
mediumJobsPerNamespace: 1
smallJobSize: {{$SMALL_GROUP_SIZE}}
smallJobsPerNamespace: 1
{{if not $IS_SMALL_CLUSTER}}
# BEGIN scheduler throughput
- module:
path: modules/scheduler-throughput.yaml
params:
action: create
namespaces: {{$namespaces}}
replicasPerNamespace: 1
schedulerThroughputNamespaces: {{$schedulerThroughputNamespaces}}
schedulerThroughputPodsPerDeployment: {{$schedulerThroughputPodsPerDeployment}}
{{end}}
- module:
path: modules/dns-k8s-hostnames.yaml
{{if $EXEC_COMMAND}}
{{if $ENABLE_API_AVAILABILITY_MEASUREMENT}}
- name: Pausing APIAvailability measurement
measurements:
- Identifier: APIAvailability
Method: APIAvailability
Params:
action: pause
{{end}}
- name: Exec command
measurements:
- Identifier: ExecCommand
Method: Exec
Params:
timeout: {{$EXEC_TIMEOUT}}
command:
{{range $EXEC_COMMAND}}
- {{.}}
{{end}}
{{if $ENABLE_API_AVAILABILITY_MEASUREMENT}}
- name: Unpausing APIAvailability measurement
measurements:
- Identifier: APIAvailability
Method: APIAvailability
Params:
action: unpause
{{end}}
- name: Sleep
measurements:
- Identifier: WaitAfterExec
Method: Sleep
Params:
duration: {{$SLEEP_AFTER_EXEC_DURATION}}
{{end}}
{{if not $EXIT_AFTER_EXEC}}
{{if not $IS_SMALL_CLUSTER}}
- module:
path: modules/scheduler-throughput.yaml
params:
action: delete
namespaces: {{$namespaces}}
replicasPerNamespace: 0
schedulerThroughputNamespaces: {{$schedulerThroughputNamespaces}}
schedulerThroughputPodsPerDeployment: {{$schedulerThroughputPodsPerDeployment}}
# END scheduler throughput
{{end}}
{{if not $IS_SMALL_CLUSTER}}
# TODO(kubernetes/perf-tests/issues/1024): We shouldn't have a dedicated module for measuring pod-startup-latency.
- module:
path: modules/pod-startup-latency.yaml
params:
namespaces: {{$namespaces}}
minPodsInSmallCluster: {{$MIN_PODS_IN_SMALL_CLUSTERS}}
image: {{$latencyPodImage}}
{{end}}
- module:
path: /modules/reconcile-objects.yaml
params:
actionName: "scale and update"
namespaces: {{$namespaces}}
{{if $RATE_LIMIT_POD_CREATION}}
tuningSet: RandomizedScalingTimeLimited
operationTimeout: 15m
{{else}}
tuningSet: default
operationTimeout: {{AddInt (DivideInt $saturationTime 4) 900}}s
{{end}}
randomScaleFactor: {{$RANDOM_SCALE_FACTOR}}
testMaxReplicaFactor: {{$RANDOM_SCALE_FACTOR}}
daemonSetImage: {{$latencyPodImage}}
daemonSetEnv: "after update"
daemonSetReplicas: 1
bigDeploymentSize: {{$BIG_GROUP_SIZE}}
bigDeploymentsPerNamespace: {{$bigDeploymentsPerNamespace}}
mediumDeploymentSize: {{$MEDIUM_GROUP_SIZE}}
mediumDeploymentsPerNamespace: {{$mediumDeploymentsPerNamespace}}
smallDeploymentSize: {{$SMALL_GROUP_SIZE}}
smallDeploymentsPerNamespace: {{$smallDeploymentsPerNamespace}}
smallStatefulSetSize: {{$SMALL_GROUP_SIZE}}
smallStatefulSetsPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}}
mediumStatefulSetSize: {{$MEDIUM_GROUP_SIZE}}
mediumStatefulSetsPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}}
bigJobSize: {{$BIG_GROUP_SIZE}}
bigJobsPerNamespace: {{$bigJobsPerNamespace}}
mediumJobSize: {{$MEDIUM_GROUP_SIZE}}
mediumJobsPerNamespace: 1
smallJobSize: {{$SMALL_GROUP_SIZE}}
smallJobsPerNamespace: 1
- module:
path: /modules/reconcile-objects.yaml
params:
actionName: "delete"
namespaces: {{$namespaces}}
{{if $RATE_LIMIT_POD_CREATION}}
tuningSet: RandomizedDeletionTimeLimited
operationTimeout: 15m
{{else}}
tuningSet: default
operationTimeout: {{AddInt $deletionTime 900}}s
{{end}}
testMaxReplicaFactor: {{$RANDOM_SCALE_FACTOR}}
daemonSetReplicas: 0
bigDeploymentSize: {{$BIG_GROUP_SIZE}}
bigDeploymentsPerNamespace: 0
mediumDeploymentSize: {{$MEDIUM_GROUP_SIZE}}
mediumDeploymentsPerNamespace: 0
smallDeploymentSize: {{$SMALL_GROUP_SIZE}}
smallDeploymentsPerNamespace: 0
smallStatefulSetSize: {{$SMALL_GROUP_SIZE}}
smallStatefulSetsPerNamespace: 0
mediumStatefulSetSize: {{$MEDIUM_GROUP_SIZE}}
mediumStatefulSetsPerNamespace: 0
bigJobSize: {{$BIG_GROUP_SIZE}}
bigJobsPerNamespace: 0
mediumJobSize: {{$MEDIUM_GROUP_SIZE}}
mediumJobsPerNamespace: 0
smallJobSize: {{$SMALL_GROUP_SIZE}}
smallJobsPerNamespace: 0
pvSmallStatefulSetSize: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}}
pvMediumStatefulSetSize: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}}
- module:
path: /modules/configmaps-secrets.yaml
params:
actionName: delete
tuningSet: default
namespaces: {{$namespaces}}
bigDeploymentsPerNamespace: 0
mediumDeploymentsPerNamespace: 0
smallDeploymentsPerNamespace: 0
- name: Deleting PriorityClass for DaemonSets
phases:
- replicasPerNamespace: 0
tuningSet: Sequence
objectBundle:
- basename: daemonset-priorityclass
objectTemplatePath: daemonset-priorityclass.yaml
- module:
path: modules/services.yaml
params:
actionName: "Deleting"
namespaces: {{$namespaces}}
smallServicesPerNamespace: 0
mediumServicesPerNamespace: 0
bigServicesPerNamespace: 0
{{end}} # not EXIT_AFTER_EXEC
- module:
path: /modules/measurements.yaml
params:
action: gather