clusterloader2/testing/density/config.yaml (237 lines of code) (raw):

# ASSUMPTIONS: # - Underlying cluster should have 100+ nodes. # - Number of nodes should be divisible by NODES_PER_NAMESPACE (default 100). #Constants {{$DENSITY_RESOURCE_CONSTRAINTS_FILE := DefaultParam .DENSITY_RESOURCE_CONSTRAINTS_FILE ""}} # Cater for the case where the number of nodes is less than nodes per namespace. See https://github.com/kubernetes/perf-tests/issues/887 {{$NODES_PER_NAMESPACE := MinInt .Nodes (DefaultParam .NODES_PER_NAMESPACE 100)}} {{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 30}} {{$DENSITY_TEST_THROUGHPUT := DefaultParam .DENSITY_TEST_THROUGHPUT 20}} {{$SCHEDULER_THROUGHPUT_THRESHOLD := DefaultParam .CL2_SCHEDULER_THROUGHPUT_THRESHOLD 0}} # LATENCY_POD_MEMORY and LATENCY_POD_CPU are calculated for 1-core 4GB node. # Increasing allocation of both memory and cpu by 10% # decreases the value of priority function in scheduler by one point. # This results in decreased probability of choosing the same node again. {{$LATENCY_POD_CPU := DefaultParam .LATENCY_POD_CPU 100}} {{$LATENCY_POD_MEMORY := DefaultParam .LATENCY_POD_MEMORY 350}} {{$MIN_LATENCY_PODS := DefaultParam .MIN_LATENCY_PODS 500}} {{$MIN_SATURATION_PODS_TIMEOUT := 180}} {{$ENABLE_CHAOSMONKEY := DefaultParam .ENABLE_CHAOSMONKEY false}} {{$ENABLE_SYSTEM_POD_METRICS:= DefaultParam .ENABLE_SYSTEM_POD_METRICS true}} {{$ENABLE_CLUSTER_OOMS_TRACKER := DefaultParam .CL2_ENABLE_CLUSTER_OOMS_TRACKER true}} {{$CLUSTER_OOMS_IGNORED_PROCESSES := DefaultParam .CL2_CLUSTER_OOMS_IGNORED_PROCESSES ""}} {{$USE_SIMPLE_LATENCY_QUERY := DefaultParam .USE_SIMPLE_LATENCY_QUERY false}} {{$ENABLE_RESTART_COUNT_CHECK := DefaultParam .ENABLE_RESTART_COUNT_CHECK true}} {{$RESTART_COUNT_THRESHOLD_OVERRIDES:= DefaultParam .RESTART_COUNT_THRESHOLD_OVERRIDES ""}} {{$ALLOWED_SLOW_API_CALLS := DefaultParam .CL2_ALLOWED_SLOW_API_CALLS 0}} {{$ENABLE_VIOLATIONS_FOR_SCHEDULING_THROUGHPUT := DefaultParam .CL2_ENABLE_VIOLATIONS_FOR_SCHEDULING_THROUGHPUT true}} #Variables {{$namespaces := DivideInt .Nodes $NODES_PER_NAMESPACE}} {{$podsPerNamespace := MultiplyInt $PODS_PER_NODE $NODES_PER_NAMESPACE}} {{$totalPods := MultiplyInt $podsPerNamespace $namespaces}} {{$latencyReplicas := DivideInt (MaxInt $MIN_LATENCY_PODS .Nodes) $namespaces}} {{$totalLatencyPods := MultiplyInt $namespaces $latencyReplicas}} {{$saturationDeploymentTimeout := DivideFloat $totalPods $DENSITY_TEST_THROUGHPUT | AddInt $MIN_SATURATION_PODS_TIMEOUT}} # saturationDeploymentHardTimeout must be at least 20m to make sure that ~10m node # failure won't fail the test. See https://github.com/kubernetes/kubernetes/issues/73461#issuecomment-467338711 {{$saturationDeploymentHardTimeout := MaxInt $saturationDeploymentTimeout 1200}} {{$saturationDeploymentSpec := DefaultParam .SATURATION_DEPLOYMENT_SPEC "deployment.yaml"}} {{$latencyDeploymentSpec := DefaultParam .LATENCY_DEPLOYMENT_SPEC "deployment.yaml"}} # Probe measurements shared parameter {{$PROBE_MEASUREMENTS_CHECK_PROBES_READY_TIMEOUT := DefaultParam .CL2_PROBE_MEASUREMENTS_CHECK_PROBES_READY_TIMEOUT "15m"}} name: density namespace: number: {{$namespaces}} tuningSets: - name: Uniform5qps qpsLoad: qps: 5 {{if $ENABLE_CHAOSMONKEY}} chaosMonkey: nodeFailure: failureRate: 0.01 interval: 1m jitterFactor: 10.0 simulatedDowntime: 10m {{end}} steps: - name: Starting measurements measurements: - Identifier: APIResponsivenessPrometheus Method: APIResponsivenessPrometheus Params: action: start - Identifier: APIResponsivenessPrometheusSimple Method: APIResponsivenessPrometheus Params: action: start # TODO(oxddr): figure out how many probers to run in function of cluster - Identifier: InClusterNetworkLatency Method: InClusterNetworkLatency Params: action: start checkProbesReadyTimeout: {{$PROBE_MEASUREMENTS_CHECK_PROBES_READY_TIMEOUT}} replicasPerProbe: {{AddInt 2 (DivideInt .Nodes 100)}} - Identifier: DnsLookupLatency Method: DnsLookupLatency Params: action: start checkProbesReadyTimeout: {{$PROBE_MEASUREMENTS_CHECK_PROBES_READY_TIMEOUT}} replicasPerProbe: {{AddInt 2 (DivideInt .Nodes 100)}} - Identifier: TestMetrics Method: TestMetrics Params: action: start resourceConstraints: {{$DENSITY_RESOURCE_CONSTRAINTS_FILE}} systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}} clusterOOMsTrackerEnabled: {{$ENABLE_CLUSTER_OOMS_TRACKER}} clusterOOMsIgnoredProcesses: {{$CLUSTER_OOMS_IGNORED_PROCESSES}} restartCountThresholdOverrides: {{YamlQuote $RESTART_COUNT_THRESHOLD_OVERRIDES 4}} enableRestartCountCheck: {{$ENABLE_RESTART_COUNT_CHECK}} - name: Starting saturation pod measurements measurements: - Identifier: SaturationPodStartupLatency Method: PodStartupLatency Params: action: start labelSelector: group = saturation threshold: {{$saturationDeploymentTimeout}}s - Identifier: WaitForRunningSaturationDeployments Method: WaitForControlledPodsRunning Params: action: start apiVersion: apps/v1 kind: Deployment labelSelector: group = saturation operationTimeout: {{$saturationDeploymentHardTimeout}}s - Identifier: SchedulingThroughput Method: SchedulingThroughput Params: action: start labelSelector: group = saturation - name: Creating saturation pods phases: - namespaceRange: min: 1 max: {{$namespaces}} replicasPerNamespace: 1 tuningSet: Uniform5qps objectBundle: - basename: saturation-deployment objectTemplatePath: {{$saturationDeploymentSpec}} templateFillMap: Replicas: {{$podsPerNamespace}} Group: saturation CpuRequest: 1m MemoryRequest: 10M - name: Waiting for saturation pods to be running measurements: - Identifier: WaitForRunningSaturationDeployments Method: WaitForControlledPodsRunning Params: action: gather - name: Collecting saturation pod measurements measurements: - Identifier: SaturationPodStartupLatency Method: PodStartupLatency Params: action: gather - Identifier: SchedulingThroughput Method: SchedulingThroughput Params: action: gather enableViolations: {{$ENABLE_VIOLATIONS_FOR_SCHEDULING_THROUGHPUT}} threshold: {{$SCHEDULER_THROUGHPUT_THRESHOLD}} - name: Starting latency pod measurements measurements: - Identifier: PodStartupLatency Method: PodStartupLatency Params: action: start labelSelector: group = latency - Identifier: WaitForRunningLatencyDeployments Method: WaitForControlledPodsRunning Params: action: start apiVersion: apps/v1 kind: Deployment labelSelector: group = latency operationTimeout: 15m - name: Creating latency pods phases: - namespaceRange: min: 1 max: {{$namespaces}} replicasPerNamespace: {{$latencyReplicas}} tuningSet: Uniform5qps objectBundle: - basename: latency-deployment objectTemplatePath: {{$latencyDeploymentSpec}} templateFillMap: Replicas: 1 Group: latency CpuRequest: {{$LATENCY_POD_CPU}}m MemoryRequest: {{$LATENCY_POD_MEMORY}}M - name: Waiting for latency pods to be running measurements: - Identifier: WaitForRunningLatencyDeployments Method: WaitForControlledPodsRunning Params: action: gather - name: Deleting latency pods phases: - namespaceRange: min: 1 max: {{$namespaces}} replicasPerNamespace: 0 tuningSet: Uniform5qps objectBundle: - basename: latency-deployment objectTemplatePath: {{$latencyDeploymentSpec}} - name: Waiting for latency pods to be deleted measurements: - Identifier: WaitForRunningLatencyDeployments Method: WaitForControlledPodsRunning Params: action: gather - name: Collecting pod startup latency measurements: - Identifier: PodStartupLatency Method: PodStartupLatency Params: action: gather - name: Deleting saturation pods phases: - namespaceRange: min: 1 max: {{$namespaces}} replicasPerNamespace: 0 tuningSet: Uniform5qps objectBundle: - basename: saturation-deployment objectTemplatePath: {{$saturationDeploymentSpec}} - name: Waiting for saturation pods to be deleted measurements: - Identifier: WaitForRunningSaturationDeployments Method: WaitForControlledPodsRunning Params: action: gather - name: Collecting measurements measurements: - Identifier: APIResponsivenessPrometheusSimple Method: APIResponsivenessPrometheus Params: action: gather enableViolations: true useSimpleLatencyQuery: true summaryName: APIResponsivenessPrometheus_simple allowedSlowCalls: {{$ALLOWED_SLOW_API_CALLS}} {{if not $USE_SIMPLE_LATENCY_QUERY}} - Identifier: APIResponsivenessPrometheus Method: APIResponsivenessPrometheus Params: action: gather allowedSlowCalls: {{$ALLOWED_SLOW_API_CALLS}} {{end}} - Identifier: InClusterNetworkLatency Method: InClusterNetworkLatency Params: action: gather - Identifier: DnsLookupLatency Method: DnsLookupLatency Params: action: gather - Identifier: TestMetrics Method: TestMetrics Params: action: gather systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}} clusterOOMsTrackerEnabled: {{$ENABLE_CLUSTER_OOMS_TRACKER}} restartCountThresholdOverrides: {{YamlQuote $RESTART_COUNT_THRESHOLD_OVERRIDES 4}} enableRestartCountCheck: {{$ENABLE_RESTART_COUNT_CHECK}}