in agents/bc_diffusion.py [0:0]
def train(self, replay_buffer, iterations, batch_size=100):
for it in range(iterations):
# Sample replay buffer / batch
state, action, next_state, reward, not_done = replay_buffer.sample(batch_size)
loss = self.actor.loss(action, state)
self.actor_optimizer.zero_grad()
loss.backward()
self.actor_optimizer.step()
# Logging
logger.record_tabular('Diffusion BC Loss', loss.item())