in agents/bc_w.py [0:0]
def train(self, replay_buffer, iterations, batch_size=100):
for it in range(iterations):
# Sample replay buffer / batch
state, action, _, _, _ = replay_buffer.sample(batch_size)
critic_loss = self.optimize_c(state, action)
actor_loss = self.optimize_p(state, action)
for _ in range(self.c_iter - 1):
state, action, _, _, _ = replay_buffer.sample(batch_size)
critic_loss = self.optimize_c(state, action)
logger.record_tabular('Actor Loss', actor_loss)
logger.record_tabular('Critic Loss', critic_loss)