clusterloader2/testing/load/config.yaml (290 lines of code) (raw):

# ASSUMPTIONS: # - Underlying cluster should have 100+ nodes. # - Number of nodes should be divisible by NODES_PER_NAMESPACE (default 100). # - The number of created SVCs is half the number of created Deployments. # - Only half of Deployments will be assigned 1-1 to existing SVCs. #Constants # Cater for the case where the number of nodes is less than nodes per namespace. See https://github.com/kubernetes/perf-tests/issues/887 {{$NODES_PER_NAMESPACE := MinInt .Nodes (DefaultParam .NODES_PER_NAMESPACE 100)}} # See https://github.com/kubernetes/perf-tests/pull/1667#issuecomment-769642266 {{$IS_SMALL_CLUSTER := lt .Nodes 100}} {{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 30}} {{$LOAD_TEST_THROUGHPUT := DefaultParam .CL2_LOAD_TEST_THROUGHPUT 10}} {{$DELETE_TEST_THROUGHPUT := DefaultParam .CL2_DELETE_TEST_THROUGHPUT $LOAD_TEST_THROUGHPUT}} {{$RATE_LIMIT_POD_CREATION := DefaultParam .CL2_RATE_LIMIT_POD_CREATION true}} {{$BIG_GROUP_SIZE := DefaultParam .BIG_GROUP_SIZE 250}} {{$MEDIUM_GROUP_SIZE := DefaultParam .MEDIUM_GROUP_SIZE 30}} {{$SMALL_GROUP_SIZE := DefaultParam .SMALL_GROUP_SIZE 5}} {{$SMALL_STATEFUL_SETS_PER_NAMESPACE := DefaultParam .SMALL_STATEFUL_SETS_PER_NAMESPACE 1}} {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE := DefaultParam .MEDIUM_STATEFUL_SETS_PER_NAMESPACE 1}} {{$ENABLE_CHAOSMONKEY := DefaultParam .ENABLE_CHAOSMONKEY false}} {{$ENABLE_API_AVAILABILITY_MEASUREMENT := DefaultParam .CL2_ENABLE_API_AVAILABILITY_MEASUREMENT false}} {{$RANDOM_SCALE_FACTOR := 0.5}} #Variables {{$namespaces := DivideInt .Nodes $NODES_PER_NAMESPACE}} {{$totalPods := MultiplyInt $namespaces $NODES_PER_NAMESPACE $PODS_PER_NODE}} {{$podsPerNamespace := DivideInt $totalPods $namespaces}} {{$saturationTime := DivideInt $totalPods $LOAD_TEST_THROUGHPUT}} {{$deletionTime := DivideInt $totalPods $DELETE_TEST_THROUGHPUT}} # bigDeployments - 1/4 of namespace pods should be in big Deployments. {{$bigDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $BIG_GROUP_SIZE)}} # mediumDeployments - 1/4 of namespace pods should be in medium Deployments. {{$mediumDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $MEDIUM_GROUP_SIZE)}} # smallDeployments - 1/2 of namespace pods should be in small Deployments. {{$smallDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 2 $SMALL_GROUP_SIZE)}} # Stateful sets are enabled. Reduce the number of small and medium deployments per namespace # See https://github.com/kubernetes/perf-tests/issues/1036#issuecomment-607631768 # Ensure non zero or negative after subtraction. {{$smallDeploymentsPerNamespace := MaxInt 0 (SubtractInt $smallDeploymentsPerNamespace $SMALL_STATEFUL_SETS_PER_NAMESPACE)}} {{$mediumDeploymentsPerNamespace := MaxInt 0 (SubtractInt $mediumDeploymentsPerNamespace $MEDIUM_STATEFUL_SETS_PER_NAMESPACE)}} # Jobs are enabled. Reduce the number of small, medium, big deployments per namespace. # Ensure non zero or negative after subtraction. {{$smallDeploymentsPerNamespace := MaxInt 0 (SubtractInt $smallDeploymentsPerNamespace 1)}} {{$mediumDeploymentsPerNamespace := MaxInt 0 (SubtractInt $mediumDeploymentsPerNamespace 1)}} {{$bigDeploymentsPerNamespace := MaxInt 0 (SubtractInt $bigDeploymentsPerNamespace 1)}} # Disable big jobs on small clusters. {{$bigJobsPerNamespace := IfThenElse $IS_SMALL_CLUSTER 0 1}} # The minimal number of pods to be used to measure various things like # pod-startup-latency or scheduler-throughput. The purpose of it is to avoid # problems in small clusters where we wouldn't have enough samples (pods) to # measure things accurately. {{$MIN_PODS_IN_SMALL_CLUSTERS := 500}} # BEGIN scheduler-throughput section # TODO( https://github.com/kubernetes/perf-tests/issues/1027): Lower the number of "min-pods" once we fix the scheduler throughput measurement. {{$totalSchedulerThroughputPods := MaxInt (MultiplyInt 2 $MIN_PODS_IN_SMALL_CLUSTERS) .Nodes}} # Determines number of pods per deployment. Should be a divider of $totalSchedulerThroughputPods. {{$schedulerThroughputPodsPerDeployment := DefaultParam .CL2_SCHEDULER_THROUGHPUT_PODS_PER_DEPLOYMENT $totalSchedulerThroughputPods}} {{$schedulerThroughputNamespaces := DivideInt $totalSchedulerThroughputPods $schedulerThroughputPodsPerDeployment}} # Set schedulerThroughputNamespaces to 1 on small clusters otherwise it will result # in an unnecessary number of namespaces. {{$schedulerThroughputNamespaces := IfThenElse $IS_SMALL_CLUSTER 1 $schedulerThroughputNamespaces}} # END scheduler-throughput section # Command to be executed {{$EXEC_COMMAND := DefaultParam .CL2_EXEC_COMMAND nil}} {{$EXIT_AFTER_EXEC := DefaultParam .CL2_EXIT_AFTER_EXEC false}} {{$EXEC_TIMEOUT := DefaultParam .CL2_EXEC_TIMEOUT "3600s"}} {{$SLEEP_AFTER_EXEC_DURATION := DefaultParam .CL2_SLEEP_AFTER_EXEC_DURATION "0s"}} {{$latencyPodImage := DefaultParam .CL2_LATENCY_POD_IMAGE "k8s.gcr.io/pause:3.1"}} {{$defaultQps := DefaultParam .CL2_DEFAULT_QPS (IfThenElse (le .Nodes 500) 10 100)}} name: load namespace: number: {{AddInt $namespaces $schedulerThroughputNamespaces}} tuningSets: - name: Sequence parallelismLimitedLoad: parallelismLimit: 1 # TODO(https://github.com/kubernetes/perf-tests/issues/1024): This TuningSet is used only for pod-startup-latency, get rid of it # Uniform5qps: for each running phase, use 5 qps. - name: Uniform5qps qpsLoad: qps: 5 # default is a tuningset that is meant to be used when we don't have any specific requirements on pace of operations. - name: default globalQPSLoad: qps: {{$defaultQps}} burst: 1 - name: RandomizedSaturationTimeLimited RandomizedTimeLimitedLoad: timeLimit: {{$saturationTime}}s - name: RandomizedScalingTimeLimited RandomizedTimeLimitedLoad: # The expected number of created/deleted pods is totalPods/4 when scaling, # as each RS changes its size from X to a uniform random value in [X/2, 3X/2]. # To match 10 [pods/s] requirement, we need to divide saturationTime by 4. timeLimit: {{DivideInt $saturationTime 4}}s - name: RandomizedDeletionTimeLimited RandomizedTimeLimitedLoad: timeLimit: {{$deletionTime}}s {{if $ENABLE_CHAOSMONKEY}} chaosMonkey: nodeFailure: failureRate: 0.01 interval: 5m jitterFactor: 2.0 simulatedDowntime: 10m {{end}} steps: - module: path: /modules/measurements.yaml params: action: start - module: path: modules/services.yaml params: actionName: "Creating" namespaces: {{$namespaces}} smallServicesPerNamespace: {{DivideInt (AddInt $smallDeploymentsPerNamespace 1) 2}} mediumServicesPerNamespace: {{DivideInt (AddInt $mediumDeploymentsPerNamespace 1) 2}} bigServicesPerNamespace: {{DivideInt (AddInt $bigDeploymentsPerNamespace 1) 2}} - name: Creating PriorityClass for DaemonSets phases: - replicasPerNamespace: 1 tuningSet: Sequence objectBundle: - basename: daemonset-priorityclass objectTemplatePath: daemonset-priorityclass.yaml # Moved from reconcile-objects.yaml to mitigate https://github.com/kubernetes/kubernetes/issues/96635. # TODO(https://github.com/kubernetes/perf-tests/issues/1823): Merge back to reconcile-objects.yaml once the k/k bug is fixed. - module: path: /modules/configmaps-secrets.yaml params: actionName: create tuningSet: default namespaces: {{$namespaces}} bigDeploymentsPerNamespace: {{$bigDeploymentsPerNamespace}} mediumDeploymentsPerNamespace: {{$mediumDeploymentsPerNamespace}} smallDeploymentsPerNamespace: {{$smallDeploymentsPerNamespace}} - module: path: /modules/reconcile-objects.yaml params: actionName: "create" namespaces: {{$namespaces}} {{if $RATE_LIMIT_POD_CREATION}} tuningSet: RandomizedSaturationTimeLimited operationTimeout: 15m {{else}} tuningSet: default operationTimeout: {{AddInt $saturationTime 900}}s {{end}} testMaxReplicaFactor: {{$RANDOM_SCALE_FACTOR}} # We rely on the fact that daemonset is using the same image as the 'pod-startup-latency' module. # The goal is to cache the image to all nodes before we start any latency pod, # so that when we measure pod startup latency, the image is already present on all nodes. # This way, the pod startup latency we measure excludes (or limits impact) of image pulling, # whuch matches our SLO definition: https://github.com/kubernetes/community/blob/master/sig-scalability/slos/pod_startup_latency.md. daemonSetImage: {{$latencyPodImage}} daemonSetEnv: "before update" daemonSetReplicas: 1 bigDeploymentSize: {{$BIG_GROUP_SIZE}} bigDeploymentsPerNamespace: {{$bigDeploymentsPerNamespace}} mediumDeploymentSize: {{$MEDIUM_GROUP_SIZE}} mediumDeploymentsPerNamespace: {{$mediumDeploymentsPerNamespace}} smallDeploymentSize: {{$SMALL_GROUP_SIZE}} smallDeploymentsPerNamespace: {{$smallDeploymentsPerNamespace}} smallStatefulSetSize: {{$SMALL_GROUP_SIZE}} smallStatefulSetsPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}} mediumStatefulSetSize: {{$MEDIUM_GROUP_SIZE}} mediumStatefulSetsPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}} bigJobSize: {{$BIG_GROUP_SIZE}} bigJobsPerNamespace: {{$bigJobsPerNamespace}} mediumJobSize: {{$MEDIUM_GROUP_SIZE}} mediumJobsPerNamespace: 1 smallJobSize: {{$SMALL_GROUP_SIZE}} smallJobsPerNamespace: 1 {{if not $IS_SMALL_CLUSTER}} # BEGIN scheduler throughput - module: path: modules/scheduler-throughput.yaml params: action: create namespaces: {{$namespaces}} replicasPerNamespace: 1 schedulerThroughputNamespaces: {{$schedulerThroughputNamespaces}} schedulerThroughputPodsPerDeployment: {{$schedulerThroughputPodsPerDeployment}} {{end}} - module: path: modules/dns-k8s-hostnames.yaml {{if $EXEC_COMMAND}} {{if $ENABLE_API_AVAILABILITY_MEASUREMENT}} - name: Pausing APIAvailability measurement measurements: - Identifier: APIAvailability Method: APIAvailability Params: action: pause {{end}} - name: Exec command measurements: - Identifier: ExecCommand Method: Exec Params: timeout: {{$EXEC_TIMEOUT}} command: {{range $EXEC_COMMAND}} - {{.}} {{end}} {{if $ENABLE_API_AVAILABILITY_MEASUREMENT}} - name: Unpausing APIAvailability measurement measurements: - Identifier: APIAvailability Method: APIAvailability Params: action: unpause {{end}} - name: Sleep measurements: - Identifier: WaitAfterExec Method: Sleep Params: duration: {{$SLEEP_AFTER_EXEC_DURATION}} {{end}} {{if not $EXIT_AFTER_EXEC}} {{if not $IS_SMALL_CLUSTER}} - module: path: modules/scheduler-throughput.yaml params: action: delete namespaces: {{$namespaces}} replicasPerNamespace: 0 schedulerThroughputNamespaces: {{$schedulerThroughputNamespaces}} schedulerThroughputPodsPerDeployment: {{$schedulerThroughputPodsPerDeployment}} # END scheduler throughput {{end}} {{if not $IS_SMALL_CLUSTER}} # TODO(kubernetes/perf-tests/issues/1024): We shouldn't have a dedicated module for measuring pod-startup-latency. - module: path: modules/pod-startup-latency.yaml params: namespaces: {{$namespaces}} minPodsInSmallCluster: {{$MIN_PODS_IN_SMALL_CLUSTERS}} image: {{$latencyPodImage}} {{end}} - module: path: /modules/reconcile-objects.yaml params: actionName: "scale and update" namespaces: {{$namespaces}} {{if $RATE_LIMIT_POD_CREATION}} tuningSet: RandomizedScalingTimeLimited operationTimeout: 15m {{else}} tuningSet: default operationTimeout: {{AddInt (DivideInt $saturationTime 4) 900}}s {{end}} randomScaleFactor: {{$RANDOM_SCALE_FACTOR}} testMaxReplicaFactor: {{$RANDOM_SCALE_FACTOR}} daemonSetImage: {{$latencyPodImage}} daemonSetEnv: "after update" daemonSetReplicas: 1 bigDeploymentSize: {{$BIG_GROUP_SIZE}} bigDeploymentsPerNamespace: {{$bigDeploymentsPerNamespace}} mediumDeploymentSize: {{$MEDIUM_GROUP_SIZE}} mediumDeploymentsPerNamespace: {{$mediumDeploymentsPerNamespace}} smallDeploymentSize: {{$SMALL_GROUP_SIZE}} smallDeploymentsPerNamespace: {{$smallDeploymentsPerNamespace}} smallStatefulSetSize: {{$SMALL_GROUP_SIZE}} smallStatefulSetsPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}} mediumStatefulSetSize: {{$MEDIUM_GROUP_SIZE}} mediumStatefulSetsPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}} bigJobSize: {{$BIG_GROUP_SIZE}} bigJobsPerNamespace: {{$bigJobsPerNamespace}} mediumJobSize: {{$MEDIUM_GROUP_SIZE}} mediumJobsPerNamespace: 1 smallJobSize: {{$SMALL_GROUP_SIZE}} smallJobsPerNamespace: 1 - module: path: /modules/reconcile-objects.yaml params: actionName: "delete" namespaces: {{$namespaces}} {{if $RATE_LIMIT_POD_CREATION}} tuningSet: RandomizedDeletionTimeLimited operationTimeout: 15m {{else}} tuningSet: default operationTimeout: {{AddInt $deletionTime 900}}s {{end}} testMaxReplicaFactor: {{$RANDOM_SCALE_FACTOR}} daemonSetReplicas: 0 bigDeploymentSize: {{$BIG_GROUP_SIZE}} bigDeploymentsPerNamespace: 0 mediumDeploymentSize: {{$MEDIUM_GROUP_SIZE}} mediumDeploymentsPerNamespace: 0 smallDeploymentSize: {{$SMALL_GROUP_SIZE}} smallDeploymentsPerNamespace: 0 smallStatefulSetSize: {{$SMALL_GROUP_SIZE}} smallStatefulSetsPerNamespace: 0 mediumStatefulSetSize: {{$MEDIUM_GROUP_SIZE}} mediumStatefulSetsPerNamespace: 0 bigJobSize: {{$BIG_GROUP_SIZE}} bigJobsPerNamespace: 0 mediumJobSize: {{$MEDIUM_GROUP_SIZE}} mediumJobsPerNamespace: 0 smallJobSize: {{$SMALL_GROUP_SIZE}} smallJobsPerNamespace: 0 pvSmallStatefulSetSize: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}} pvMediumStatefulSetSize: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}} - module: path: /modules/configmaps-secrets.yaml params: actionName: delete tuningSet: default namespaces: {{$namespaces}} bigDeploymentsPerNamespace: 0 mediumDeploymentsPerNamespace: 0 smallDeploymentsPerNamespace: 0 - name: Deleting PriorityClass for DaemonSets phases: - replicasPerNamespace: 0 tuningSet: Sequence objectBundle: - basename: daemonset-priorityclass objectTemplatePath: daemonset-priorityclass.yaml - module: path: modules/services.yaml params: actionName: "Deleting" namespaces: {{$namespaces}} smallServicesPerNamespace: 0 mediumServicesPerNamespace: 0 bigServicesPerNamespace: 0 {{end}} # not EXIT_AFTER_EXEC - module: path: /modules/measurements.yaml params: action: gather