in agents/bc_mle.py [0:0]
def train(self, replay_buffer, iterations, batch_size=100):
for it in range(iterations):
# Sample replay buffer / batch
state, action, next_state, reward, not_done = replay_buffer.sample(batch_size)
# Actor Training
log_pi = self.actor.log_prob(state, action)
actor_loss = -log_pi.mean()
self.actor_optimizer.zero_grad()
actor_loss.backward()
self.actor_optimizer.step()
logger.record_tabular('Actor Loss', actor_loss.cpu().data.numpy())