Module `AssetAllocator.algorithms.DDPG.OU_Noise`

Expand source code

import numpy as np

class OrnsteinUhlenbeckNoise():
    """
    Class for the OU Process used for generating noise to encourage the agent to explore the environment
    
    Based on:
    1. https://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab (Formula to be implemented -- Matlab)
    2. https://github.com/openai/baselines/blob/master/baselines/ddpg/noise.py (Implementation used in OpenAI Baselines)
    """
    def __init__(self, mu, sigma=0.2, theta=0.15, dt=1e-2, x_start=None):
        """Initializes the Ornstein-Uhlenbeck Noise Process

        Args:
            mu (float): mean value around which the random values are generated
            sigma (float, optional): amount of noise to be applied to the process. Defaults to 0.2.
            theta (float, optional): amount of frictional force to be applied. Defaults to 0.15.
            x_start (int, optional): the point from where the random walk is started. Defaults to None.
        """     
        self.mu = mu # mean value around which the random values are generated
        self.sigma = sigma # amount of noise to be applied to the process
        self.theta = theta # amount of frictional force to be applied
        self.dt = dt
        self.x_start = x_start # the point from where the random walk is started

        self.reset()

    def reset(self):
        """
        Revert the OU process back to default settings. If x_start is specified, use it, else, start from zero.
        Args: None
        ---
        Returns: None
        ---
        """
        self.prev_x = x_start if self.x_start is not None else np.zeros_like(self.mu)

    def generate_noise(self):
        """
        Generate the next value in the random walk which is then used a noise added to the action during training to encourage exploration.
        Formula:
            X_next = X_prev + theta * (mu - X_prev) * dt + sigma * sqrt(dt) * n, where 'n' is a random number sampled from a normal distribution with mean 0 and standard deviation 1
        Args: None
        Returns: None
        """
        x = self.prev_x + self.theta * (self.mu - self.prev_x) * self.dt + \
                self.sigma * np.sqrt(self.dt) * np.random.normal(loc=0.0, scale=1.0, size=self.mu.shape)

        self.prev_x = x
        return x

Classes

class OrnsteinUhlenbeckNoise (mu, sigma=0.2, theta=0.15, dt=0.01, x_start=None)

Class for the OU Process used for generating noise to encourage the agent to explore the environment

Based on: 1. https://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab (Formula to be implemented – Matlab) 2. https://github.com/openai/baselines/blob/master/baselines/ddpg/noise.py (Implementation used in OpenAI Baselines)

Initializes the Ornstein-Uhlenbeck Noise Process

Args

mu : float: mean value around which the random values are generated
sigma : float, optional: amount of noise to be applied to the process. Defaults to 0.2.
theta : float, optional: amount of frictional force to be applied. Defaults to 0.15.
x_start : int, optional: the point from where the random walk is started. Defaults to None.

Expand source code

class OrnsteinUhlenbeckNoise():
    """
    Class for the OU Process used for generating noise to encourage the agent to explore the environment
    
    Based on:
    1. https://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab (Formula to be implemented -- Matlab)
    2. https://github.com/openai/baselines/blob/master/baselines/ddpg/noise.py (Implementation used in OpenAI Baselines)
    """
    def __init__(self, mu, sigma=0.2, theta=0.15, dt=1e-2, x_start=None):
        """Initializes the Ornstein-Uhlenbeck Noise Process

        Args:
            mu (float): mean value around which the random values are generated
            sigma (float, optional): amount of noise to be applied to the process. Defaults to 0.2.
            theta (float, optional): amount of frictional force to be applied. Defaults to 0.15.
            x_start (int, optional): the point from where the random walk is started. Defaults to None.
        """     
        self.mu = mu # mean value around which the random values are generated
        self.sigma = sigma # amount of noise to be applied to the process
        self.theta = theta # amount of frictional force to be applied
        self.dt = dt
        self.x_start = x_start # the point from where the random walk is started

        self.reset()

    def reset(self):
        """
        Revert the OU process back to default settings. If x_start is specified, use it, else, start from zero.
        Args: None
        ---
        Returns: None
        ---
        """
        self.prev_x = x_start if self.x_start is not None else np.zeros_like(self.mu)

    def generate_noise(self):
        """
        Generate the next value in the random walk which is then used a noise added to the action during training to encourage exploration.
        Formula:
            X_next = X_prev + theta * (mu - X_prev) * dt + sigma * sqrt(dt) * n, where 'n' is a random number sampled from a normal distribution with mean 0 and standard deviation 1
        Args: None
        Returns: None
        """
        x = self.prev_x + self.theta * (self.mu - self.prev_x) * self.dt + \
                self.sigma * np.sqrt(self.dt) * np.random.normal(loc=0.0, scale=1.0, size=self.mu.shape)

        self.prev_x = x
        return x

Methods

def generate_noise(self)

Generate the next value in the random walk which is then used a noise added to the action during training to encourage exploration.

Formula

X_next = X_prev + theta * (mu - X_prev) * dt + sigma * sqrt(dt) * n, where 'n' is a random number sampled from a normal distribution with mean 0 and standard deviation 1 Args: None Returns: None

Expand source code

def generate_noise(self):
    """
    Generate the next value in the random walk which is then used a noise added to the action during training to encourage exploration.
    Formula:
        X_next = X_prev + theta * (mu - X_prev) * dt + sigma * sqrt(dt) * n, where 'n' is a random number sampled from a normal distribution with mean 0 and standard deviation 1
    Args: None
    Returns: None
    """
    x = self.prev_x + self.theta * (self.mu - self.prev_x) * self.dt + \
            self.sigma * np.sqrt(self.dt) * np.random.normal(loc=0.0, scale=1.0, size=self.mu.shape)

    self.prev_x = x
    return x

def reset(self)

Revert the OU process back to default settings. If x_start is specified, use it, else, start from zero. Args: None

Returns: None

Expand source code

def reset(self):
    """
    Revert the OU process back to default settings. If x_start is specified, use it, else, start from zero.
    Args: None
    ---
    Returns: None
    ---
    """
    self.prev_x = x_start if self.x_start is not None else np.zeros_like(self.mu)