Skip to content

Commit

Permalink
Update reward_normalizer.py
Browse files Browse the repository at this point in the history
  • Loading branch information
chirathyh authored Mar 22, 2024
1 parent 524b91b commit 026627f
Showing 1 changed file with 15 additions and 21 deletions.
36 changes: 15 additions & 21 deletions utils/reward_normalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,10 @@

class RunningMeanStd(nn.Module):
# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
def __init__(
self,
epsilon: "initial count (with mean=0 ,var=1)" = 1e-4,
shape: "unbatched shape of data" = (),
distributed: "whether to allreduce stats" = True,
):
def __init__(self, epsilon = 1e-4, shape = (), distributed = True):
# epsilon: "initial count (with mean=0 ,var=1)" = 1e-4,
# shape: "unbatched shape of data" = (),
# distributed: "whether to allreduce stats" = True,
super().__init__()
self.register_buffer("mean", th.zeros(shape))
self.register_buffer("var", th.ones(shape))
Expand Down Expand Up @@ -104,13 +102,11 @@ def transform_average(self, reward):
return (reward - self.ret_rms.mean) #/ th.sqrt(self.ret_rms.var + self.epsilon)


def backward_discounted_sum(
*,
prevret: "(th.Tensor[1, float]) value predictions",
reward: "(th.Tensor[1, float]) reward",
first: "(th.Tensor[1, bool]) mark beginning of episodes",
gamma: "(float)",
):
def backward_discounted_sum(prevret, reward, first, gamma):
# prevret: "(th.Tensor[1, float]) value predictions",
# reward: "(th.Tensor[1, float]) reward",
# first: "(th.Tensor[1, bool]) mark beginning of episodes",
# gamma: "(float)",
first = first.to(dtype=th.float32)
assert first.dim() == 2
_nenv, nstep = reward.shape
Expand All @@ -120,14 +116,12 @@ def backward_discounted_sum(
return ret


def backward_average_sum(
*,
prevret: "(th.Tensor[1, float]) value predictions",
reward: "(th.Tensor[1, float]) reward",
first: "(th.Tensor[1, bool]) mark beginning of episodes",
gamma: "(float)",
ret_mean: "(float)",
):
def backward_average_sum(prevret, reward, first, gamma, ret_mean):
# prevret: "(th.Tensor[1, float]) value predictions",
# reward: "(th.Tensor[1, float]) reward",
# first: "(th.Tensor[1, bool]) mark beginning of episodes",
# gamma: "(float)",
# ret_mean: "(float)",
first = first.to(dtype=th.float32)
assert first.dim() == 2
_nenv, nstep = reward.shape
Expand Down

0 comments on commit 026627f

Please sign in to comment.