in clusterloader2/pkg/measurement/common/wait_for_jobs.go [149:190]
func (w *waitForFinishedJobsMeasurement) gather(timeout time.Duration) error {
if !w.isRunning {
return fmt.Errorf("%v: wait for finished jobs was not started", w)
}
klog.V(2).Infof("%v: waiting for finished jobs measurement...", w)
jobKeys, err := w.jobKeys()
if err != nil {
return err
}
cond := func() (bool, error) {
w.lock.Lock()
defer w.lock.Unlock()
finishedKeys := make(sets.String, len(w.finishedJobs))
for k := range w.finishedJobs {
finishedKeys.Insert(k)
}
return jobKeys.Equal(finishedKeys), nil
}
if err := wait.Poll(checkFinishedJobsInterval, timeout, cond); err != nil {
klog.V(2).Infof("Timed out waiting for all jobs to finish: %v", err)
}
completed := 0
failed := 0
timedOut := sets.NewString()
w.lock.Lock()
defer w.lock.Unlock()
for key := range jobKeys {
if cond, ok := w.finishedJobs[key]; !ok {
timedOut.Insert(key)
} else if cond == batchv1.JobComplete {
completed++
} else if cond == batchv1.JobFailed {
failed++
}
}
if timedOut.Len() != 0 {
return fmt.Errorf("%d Jobs timed out: %s", timedOut.Len(), strings.Join(timedOut.List(), ", "))
}
klog.V(2).Infof("%v: %d/%d Jobs finished, %d completed, %d failed", w, completed+failed, len(jobKeys), completed, failed)
return nil
}