def train()

in agents/bc_mle.py [0:0]


    def train(self, replay_buffer, iterations, batch_size=100):

        for it in range(iterations):
            # Sample replay buffer / batch
            state, action, next_state, reward, not_done = replay_buffer.sample(batch_size)

            # Actor Training
            log_pi = self.actor.log_prob(state, action)

            actor_loss = -log_pi.mean()
            self.actor_optimizer.zero_grad()
            actor_loss.backward()
            self.actor_optimizer.step()

        logger.record_tabular('Actor Loss', actor_loss.cpu().data.numpy())