in agents/diffusion.py [0:0]
def sample_last_few(self, state):
batch_size = state.shape[0]
shape = (batch_size, self.action_dim)
device = self.betas.device
x = torch.randn(shape, device=device)
nest_limit = 5
for i in reversed(range(0, self.n_timesteps)):
timesteps = torch.full((batch_size,), i, device=device, dtype=torch.long)
if i >= nest_limit:
with torch.no_grad():
x = self.p_sample(x, timesteps, state)
else:
x = self.p_sample(x, timesteps, state)
action = x
return action.clamp_(-self.max_action, self.max_action)