def train()

in agents/bc_w.py [0:0]


    def train(self, replay_buffer, iterations, batch_size=100):

        for it in range(iterations):
            # Sample replay buffer / batch
            state, action, _, _, _ = replay_buffer.sample(batch_size)

            critic_loss = self.optimize_c(state, action)
            actor_loss = self.optimize_p(state, action)

            for _ in range(self.c_iter - 1):
                state, action, _, _, _ = replay_buffer.sample(batch_size)
                critic_loss = self.optimize_c(state, action)

        logger.record_tabular('Actor Loss', actor_loss)
        logger.record_tabular('Critic Loss', critic_loss)