in utils/data_sampler.py [0:0]
def __init__(self, data, device, reward_tune='no'):
self.state = torch.from_numpy(data['observations']).float()
self.action = torch.from_numpy(data['actions']).float()
self.next_state = torch.from_numpy(data['next_observations']).float()
reward = torch.from_numpy(data['rewards']).view(-1, 1).float()
self.not_done = 1. - torch.from_numpy(data['terminals']).view(-1, 1).float()
self.size = self.state.shape[0]
self.state_dim = self.state.shape[1]
self.action_dim = self.action.shape[1]
self.device = device
if reward_tune == 'normalize':
reward = (reward - reward.mean()) / reward.std()
elif reward_tune == 'iql_antmaze':
reward = reward - 1.0
elif reward_tune == 'iql_locomotion':
reward = iql_normalize(reward, self.not_done)
elif reward_tune == 'cql_antmaze':
reward = (reward - 0.5) * 4.0
self.reward = reward