in agents/bc_gan.py [0:0]
def __init__(self,
state_dim,
action_dim,
max_action,
device,
discount,
tau,
lr=3e-4,
):
noise_dim = min(action_dim, 10)
self.noise = NormalNoise(device=device, mean=0.0, std=1.0)
self.actor = ImplicitPolicy(state_dim, action_dim, max_action, self.noise, noise_dim, device).to(
device)
self.actor_target = copy.deepcopy(self.actor)
self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr=lr, betas=(0.4, 0.999))
self.discriminator = Discriminator(state_dim=state_dim, action_dim=action_dim).to(device)
self.discriminator_optimizer = torch.optim.Adam(self.discriminator.parameters(), lr=lr, betas=(0.4, 0.999))
self.adversarial_loss = torch.nn.BCELoss()
self.max_action = max_action
self.action_dim = action_dim
self.discount = discount
self.tau = tau
self.device = device
self.g_iter = 2