Module AssetAllocator.algorithms.REINFORCE.normalized_actions
Expand source code
import gym
from gym import spaces
import numpy as np
class NormalizedActions(gym.ActionWrapper):
def action(self, action):
action = (action + 1) / 2 # [-1, 1] => [0, 1]
action *= (self.action_space.high - self.action_space.low)
action += self.action_space.low
return action
def reverse_action(self, action):
action -= self.action_space.low
action /= (self.action_space.high - self.action_space.low)
action = action * 2 - 1
return actions
# https://github.com/google-research/google-research/blob/master/algae_dice/wrappers/normalize_action_wrapper.py
class NormalizeBoxActionWrapper(gym.ActionWrapper):
"""Rescale the action space of the environment."""
def __init__(self, env):
if not isinstance(env.action_space, spaces.Box):
raise ValueError('env %s does not use spaces.Box.' % str(env))
super(NormalizeBoxActionWrapper, self).__init__(env)
self._max_episode_steps = env.max_episode_steps
def action(self, action):
# rescale the action
low, high = self.env.action_space.low, self.env.action_space.high
scaled_action = low + (action + 1.0) * (high - low) / 2.0
scaled_action = np.clip(scaled_action, low, high)
return scaled_action
def reverse_action(self, scaled_action):
low, high = self.env.action_space.low, self.env.action_space.high
action = (scaled_action - low) * 2.0 / (high - low) - 1.0
return action
def check_and_normalize_box_actions(env):
"""Wrap env to normalize actions if [low, high] != [-1, 1]."""
low, high = env.action_space.low, env.action_space.high
if isinstance(env.action_space, spaces.Box):
if (np.abs(low + np.ones_like(low)).max() > 1e-6 or
np.abs(high - np.ones_like(high)).max() > 1e-6):
print('Normalizing environment actions.')
return NormalizeBoxActionWrapper(env)
# Environment does not need to be normalized.
return env
Functions
def check_and_normalize_box_actions(env)
-
Wrap env to normalize actions if [low, high] != [-1, 1].
Expand source code
def check_and_normalize_box_actions(env): """Wrap env to normalize actions if [low, high] != [-1, 1].""" low, high = env.action_space.low, env.action_space.high if isinstance(env.action_space, spaces.Box): if (np.abs(low + np.ones_like(low)).max() > 1e-6 or np.abs(high - np.ones_like(high)).max() > 1e-6): print('Normalizing environment actions.') return NormalizeBoxActionWrapper(env) # Environment does not need to be normalized. return env
Classes
class NormalizeBoxActionWrapper (env)
-
Rescale the action space of the environment.
Expand source code
class NormalizeBoxActionWrapper(gym.ActionWrapper): """Rescale the action space of the environment.""" def __init__(self, env): if not isinstance(env.action_space, spaces.Box): raise ValueError('env %s does not use spaces.Box.' % str(env)) super(NormalizeBoxActionWrapper, self).__init__(env) self._max_episode_steps = env.max_episode_steps def action(self, action): # rescale the action low, high = self.env.action_space.low, self.env.action_space.high scaled_action = low + (action + 1.0) * (high - low) / 2.0 scaled_action = np.clip(scaled_action, low, high) return scaled_action def reverse_action(self, scaled_action): low, high = self.env.action_space.low, self.env.action_space.high action = (scaled_action - low) * 2.0 / (high - low) - 1.0 return action
Ancestors
- gym.core.ActionWrapper
- gym.core.Wrapper
- gym.core.Env
Methods
def action(self, action)
-
Expand source code
def action(self, action): # rescale the action low, high = self.env.action_space.low, self.env.action_space.high scaled_action = low + (action + 1.0) * (high - low) / 2.0 scaled_action = np.clip(scaled_action, low, high) return scaled_action
def reverse_action(self, scaled_action)
-
Expand source code
def reverse_action(self, scaled_action): low, high = self.env.action_space.low, self.env.action_space.high action = (scaled_action - low) * 2.0 / (high - low) - 1.0 return action
class NormalizedActions (env)
-
Wraps the environment to allow a modular transformation.
This class is the base class for all wrappers. The subclass could override some methods to change the behavior of the original environment without touching the original code.
Note
Don't forget to call
super().__init__(env)
if the subclass overrides :meth:__init__
.Expand source code
class NormalizedActions(gym.ActionWrapper): def action(self, action): action = (action + 1) / 2 # [-1, 1] => [0, 1] action *= (self.action_space.high - self.action_space.low) action += self.action_space.low return action def reverse_action(self, action): action -= self.action_space.low action /= (self.action_space.high - self.action_space.low) action = action * 2 - 1 return actions
Ancestors
- gym.core.ActionWrapper
- gym.core.Wrapper
- gym.core.Env
Methods
def action(self, action)
-
Expand source code
def action(self, action): action = (action + 1) / 2 # [-1, 1] => [0, 1] action *= (self.action_space.high - self.action_space.low) action += self.action_space.low return action
def reverse_action(self, action)
-
Expand source code
def reverse_action(self, action): action -= self.action_space.low action /= (self.action_space.high - self.action_space.low) action = action * 2 - 1 return actions