Module AssetAllocator.algorithms.REINFORCE.normalized_actions

Expand source code
import gym
from gym import spaces
import numpy as np


class NormalizedActions(gym.ActionWrapper):

    def action(self, action):
        action = (action + 1) / 2  # [-1, 1] => [0, 1]
        action *= (self.action_space.high - self.action_space.low)
        action += self.action_space.low
        return action

    def reverse_action(self, action):
        action -= self.action_space.low
        action /= (self.action_space.high - self.action_space.low)
        action = action * 2 - 1
        return actions

# https://github.com/google-research/google-research/blob/master/algae_dice/wrappers/normalize_action_wrapper.py
class NormalizeBoxActionWrapper(gym.ActionWrapper):
  """Rescale the action space of the environment."""

  def __init__(self, env):
    if not isinstance(env.action_space, spaces.Box):
      raise ValueError('env %s does not use spaces.Box.' % str(env))
    super(NormalizeBoxActionWrapper, self).__init__(env)
    self._max_episode_steps = env.max_episode_steps

  def action(self, action):
    # rescale the action
    low, high = self.env.action_space.low, self.env.action_space.high
    scaled_action = low + (action + 1.0) * (high - low) / 2.0
    scaled_action = np.clip(scaled_action, low, high)

    return scaled_action

  def reverse_action(self, scaled_action):
    low, high = self.env.action_space.low, self.env.action_space.high
    action = (scaled_action - low) * 2.0 / (high - low) - 1.0
    return action


def check_and_normalize_box_actions(env):
  """Wrap env to normalize actions if [low, high] != [-1, 1]."""
  low, high = env.action_space.low, env.action_space.high

  if isinstance(env.action_space, spaces.Box):
    if (np.abs(low + np.ones_like(low)).max() > 1e-6 or
        np.abs(high - np.ones_like(high)).max() > 1e-6):
        print('Normalizing environment actions.')
        return NormalizeBoxActionWrapper(env)

  # Environment does not need to be normalized.
  return env

Functions

def check_and_normalize_box_actions(env)

Wrap env to normalize actions if [low, high] != [-1, 1].

Expand source code
def check_and_normalize_box_actions(env):
  """Wrap env to normalize actions if [low, high] != [-1, 1]."""
  low, high = env.action_space.low, env.action_space.high

  if isinstance(env.action_space, spaces.Box):
    if (np.abs(low + np.ones_like(low)).max() > 1e-6 or
        np.abs(high - np.ones_like(high)).max() > 1e-6):
        print('Normalizing environment actions.')
        return NormalizeBoxActionWrapper(env)

  # Environment does not need to be normalized.
  return env

Classes

class NormalizeBoxActionWrapper (env)

Rescale the action space of the environment.

Expand source code
class NormalizeBoxActionWrapper(gym.ActionWrapper):
  """Rescale the action space of the environment."""

  def __init__(self, env):
    if not isinstance(env.action_space, spaces.Box):
      raise ValueError('env %s does not use spaces.Box.' % str(env))
    super(NormalizeBoxActionWrapper, self).__init__(env)
    self._max_episode_steps = env.max_episode_steps

  def action(self, action):
    # rescale the action
    low, high = self.env.action_space.low, self.env.action_space.high
    scaled_action = low + (action + 1.0) * (high - low) / 2.0
    scaled_action = np.clip(scaled_action, low, high)

    return scaled_action

  def reverse_action(self, scaled_action):
    low, high = self.env.action_space.low, self.env.action_space.high
    action = (scaled_action - low) * 2.0 / (high - low) - 1.0
    return action

Ancestors

  • gym.core.ActionWrapper
  • gym.core.Wrapper
  • gym.core.Env

Methods

def action(self, action)
Expand source code
def action(self, action):
  # rescale the action
  low, high = self.env.action_space.low, self.env.action_space.high
  scaled_action = low + (action + 1.0) * (high - low) / 2.0
  scaled_action = np.clip(scaled_action, low, high)

  return scaled_action
def reverse_action(self, scaled_action)
Expand source code
def reverse_action(self, scaled_action):
  low, high = self.env.action_space.low, self.env.action_space.high
  action = (scaled_action - low) * 2.0 / (high - low) - 1.0
  return action
class NormalizedActions (env)

Wraps the environment to allow a modular transformation.

This class is the base class for all wrappers. The subclass could override some methods to change the behavior of the original environment without touching the original code.

Note

Don't forget to call super().__init__(env) if the subclass overrides :meth:__init__.

Expand source code
class NormalizedActions(gym.ActionWrapper):

    def action(self, action):
        action = (action + 1) / 2  # [-1, 1] => [0, 1]
        action *= (self.action_space.high - self.action_space.low)
        action += self.action_space.low
        return action

    def reverse_action(self, action):
        action -= self.action_space.low
        action /= (self.action_space.high - self.action_space.low)
        action = action * 2 - 1
        return actions

Ancestors

  • gym.core.ActionWrapper
  • gym.core.Wrapper
  • gym.core.Env

Methods

def action(self, action)
Expand source code
def action(self, action):
    action = (action + 1) / 2  # [-1, 1] => [0, 1]
    action *= (self.action_space.high - self.action_space.low)
    action += self.action_space.low
    return action
def reverse_action(self, action)
Expand source code
def reverse_action(self, action):
    action -= self.action_space.low
    action /= (self.action_space.high - self.action_space.low)
    action = action * 2 - 1
    return actions