def progress()

in core/train_pipeline.py [0:0]


  def progress(self, dataloader_iter: Iterator[In]) -> Out:
    if not self._connected:
      self._connect(dataloader_iter)

    # Fetch next batch
    with record_function("## next_batch ##"):
      next_batch = next(dataloader_iter)
    cur_batch = self._cur_batch
    assert cur_batch is not None

    if self._model.training:
      with record_function("## zero_grad ##"):
        self._optimizer.zero_grad()

    with record_function("## wait_for_batch ##"):
      _wait_for_batch(cur_batch, self._memcpy_stream)

    with record_function("## forward ##"):
      (losses, output) = self._model(cur_batch)

    if self._model.training:
      with record_function("## backward ##"):
        torch.sum(losses, dim=0).backward()

    # Copy the next batch to GPU
    self._cur_batch = cur_batch = next_batch
    with record_function("## copy_batch_to_gpu ##"):
      with torch.cuda.stream(self._memcpy_stream):
        self._cur_batch = _to_device(cur_batch, self._device, non_blocking=True)

    # Update
    if self._model.training:
      with record_function("## optimizer ##"):
        self._optimizer.step()

    return output