Module `AssetAllocator.algorithms.DDPG.Network`

Script that describes the details about the Actor and Critic architectures for the DDPG agent

Expand source code

"""
Script that describes the details about the Actor and Critic architectures for the DDPG agent
"""
import os
import math
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F


def get_fan_in_init_bound(layer):
    """
    Function to compute the initialisation bound at 1/sqrt(f), where f is the fan-in value (i.e., number of inputs) of the given layer
    Parameters
    ---
    layer: torch.nn.module
        The layer of the network to be initialised
    Returns
    ---
    the fan-in based upper bound to be used for initialisation, such that the lower bound is the negative of this value
    """
    fan_in, fan_out = nn.init._calculate_fan_in_and_fan_out(layer.weight)
    # fan_in = layer.weight.size(1) ## a potential solution to computing fan-in when using linear layers as the shape of the weight of the linear layer is [fan_out, fan_in]
    return 1/math.sqrt(fan_in)


def apply_uniform_init(layer, bound=None):
    """
    Function to initialise the specified layer using either the provided bound value or the fan-in based bound (suggested in the DDPG paper for hidden layers)
    Parameters
    ---
    layer: torch.nn.module
        The layer of the network to be initialised
    bound: float or None
        Specifies the value for the upper bound of the initialisation, such that the lower bound is the negative of this value. If None, then use fan-in based initilisation
    Returns
    ---
    none
    """
    if bound is None:
        bound = get_fan_in_init_bound(layer)
    nn.init.uniform_(layer.weight, a=-bound, b=bound)  # initalise the weights
    nn.init.uniform_(layer.bias, a=-bound, b=bound)  # initialise the biases


class Actor(nn.Module):
    """
    This is the actor network for the DDPG Agent.

    Original paper can be found at https://arxiv.org/abs/1509.02971

    This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/ddpg

    """

    def __init__(self, state_dim, action_dim, max_action, lr=1e-4):
        """Initialized the DDPG Actor Network

        Args:
            state_dim (int): State space dimension
            action_dim (int): Action space dimension
            lr (float, optional): Learning rate. Defaults to 0.0001.
            max_action (int, optional): Action scaling value. Defaults to 1.
        """    
        super(Actor, self).__init__()

        self.max_action = max_action

        self.dense1 = nn.Linear(state_dim, 400)
        apply_uniform_init(self.dense1)

        self.dense2 = nn.Linear(400, 300)
        apply_uniform_init(self.dense2)


        self.dense3 = nn.Linear(300, action_dim)
        apply_uniform_init(self.dense3, bound=3*10e-3)

        self.optimizer = optim.Adam(self.parameters(), lr=lr)

    def forward(self, state):
        """Forward pass

        Args:
            state (array_like): Current environment state

        Returns:
            action: Agent's Action Values
        """        
        x = F.relu(self.dense1(state))
        x = F.relu(self.dense2(x))
        # squashes the action output to a range of -1 to +1
        x = self.dense3(x)
        # x = torch.tanh(self.dense3(x))
        # assumes action range is symmetric
        x = nn.Softmax(dim=1)(self.max_action * x)
        return x

    def save_model(self, filename):
        """Save model weights

        Args:
            filename (string): File Path to save model
        """    
        torch.save(self.state_dict(), filename)

    def load_model(self, filename):
        """Load model weights

        Args:
            filename (string): File Path to model weights
        """    
        self.load_state_dict(torch.load(filename))


class Critic(nn.Module):
    """
    This is the critic network for the DDPG Agent.

    Original paper can be found at https://arxiv.org/abs/1509.02971

    This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/ddpg

    """

    def __init__(self, state_dim, action_dim, lr=1e-3):
        """Initializes the DDPG Critic Network

        Args:
            state_dim (int): State space dimension
            action_dim (int): Action space dimension
            lr (float, optional): Learning rate. Defaults to 0.001.
        """   
        super(Critic, self).__init__()

        # the input to the network is a concatenation of the state and the action performed by the agent in that state
        self.dense1 = nn.Linear(state_dim, 400)
        apply_uniform_init(self.dense1)

        #self.bn1 = nn.BatchNorm1d(400)

        self.dense2 = nn.Linear(400 + action_dim, 300)
        apply_uniform_init(self.dense2)

        #self.bn2 = nn.BatchNorm1d(300)

        self.dense3 = nn.Linear(300, 1)
        apply_uniform_init(self.dense3, bound=3*10e-4)

        self.optimizer = optim.Adam(
            self.parameters(), lr=lr, weight_decay=1e-2)

    def forward(self, state, action):
        """Forward pass

        Args:
            state (array_like): Current environment state
            action (array_like): Current agent's action

        Returns:
            out: State-Action Values
        """
        #x = torch.cat([state, action], dim=1)

        x = F.relu(self.dense1(state))

        x = torch.cat([x, action], dim=1)
        x = F.relu(self.dense2(x))
        # the computed Q-value for the given state-action pair
        x = self.dense3(x)

        return x

    def save_model(self, filename):
        """Save model weights

        Args:
            filename (string): File Path to save model
        """    
        torch.save(self.state_dict(), filename)

    def load_model(self, filename):
        """Load model weights

        Args:
            filename (string): File Path to model weights
        """    
        self.load_state_dict(torch.load(filename))

Functions

def apply_uniform_init(layer, bound=None)

Function to initialise the specified layer using either the provided bound value or the fan-in based bound (suggested in the DDPG paper for hidden layers) Parameters

layer : torch.nn.module: The layer of the network to be initialised
bound : float or None: Specifies the value for the upper bound of the initialisation, such that the lower bound is the negative of this value. If None, then use fan-in based initilisation

Returns

none

Expand source code

def apply_uniform_init(layer, bound=None):
    """
    Function to initialise the specified layer using either the provided bound value or the fan-in based bound (suggested in the DDPG paper for hidden layers)
    Parameters
    ---
    layer: torch.nn.module
        The layer of the network to be initialised
    bound: float or None
        Specifies the value for the upper bound of the initialisation, such that the lower bound is the negative of this value. If None, then use fan-in based initilisation
    Returns
    ---
    none
    """
    if bound is None:
        bound = get_fan_in_init_bound(layer)
    nn.init.uniform_(layer.weight, a=-bound, b=bound)  # initalise the weights
    nn.init.uniform_(layer.bias, a=-bound, b=bound)  # initialise the biases

def get_fan_in_init_bound(layer)

Function to compute the initialisation bound at 1/sqrt(f), where f is the fan-in value (i.e., number of inputs) of the given layer Parameters

layer : torch.nn.module: The layer of the network to be initialised

Returns

the fan-in based upper bound to be used for initialisation, such that the lower bound is the negative of this value

Expand source code

def get_fan_in_init_bound(layer):
    """
    Function to compute the initialisation bound at 1/sqrt(f), where f is the fan-in value (i.e., number of inputs) of the given layer
    Parameters
    ---
    layer: torch.nn.module
        The layer of the network to be initialised
    Returns
    ---
    the fan-in based upper bound to be used for initialisation, such that the lower bound is the negative of this value
    """
    fan_in, fan_out = nn.init._calculate_fan_in_and_fan_out(layer.weight)
    # fan_in = layer.weight.size(1) ## a potential solution to computing fan-in when using linear layers as the shape of the weight of the linear layer is [fan_out, fan_in]
    return 1/math.sqrt(fan_in)

Classes

class Actor (state_dim, action_dim, max_action, lr=0.0001)

This is the actor network for the DDPG Agent.

Original paper can be found at https://arxiv.org/abs/1509.02971

This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/ddpg

Initialized the DDPG Actor Network

Args

state_dim : int: State space dimension
action_dim : int: Action space dimension
lr : float, optional: Learning rate. Defaults to 0.0001.
max_action : int, optional: Action scaling value. Defaults to 1.

Expand source code

class Actor(nn.Module):
    """
    This is the actor network for the DDPG Agent.

    Original paper can be found at https://arxiv.org/abs/1509.02971

    This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/ddpg

    """

    def __init__(self, state_dim, action_dim, max_action, lr=1e-4):
        """Initialized the DDPG Actor Network

        Args:
            state_dim (int): State space dimension
            action_dim (int): Action space dimension
            lr (float, optional): Learning rate. Defaults to 0.0001.
            max_action (int, optional): Action scaling value. Defaults to 1.
        """    
        super(Actor, self).__init__()

        self.max_action = max_action

        self.dense1 = nn.Linear(state_dim, 400)
        apply_uniform_init(self.dense1)

        self.dense2 = nn.Linear(400, 300)
        apply_uniform_init(self.dense2)


        self.dense3 = nn.Linear(300, action_dim)
        apply_uniform_init(self.dense3, bound=3*10e-3)

        self.optimizer = optim.Adam(self.parameters(), lr=lr)

    def forward(self, state):
        """Forward pass

        Args:
            state (array_like): Current environment state

        Returns:
            action: Agent's Action Values
        """        
        x = F.relu(self.dense1(state))
        x = F.relu(self.dense2(x))
        # squashes the action output to a range of -1 to +1
        x = self.dense3(x)
        # x = torch.tanh(self.dense3(x))
        # assumes action range is symmetric
        x = nn.Softmax(dim=1)(self.max_action * x)
        return x

    def save_model(self, filename):
        """Save model weights

        Args:
            filename (string): File Path to save model
        """    
        torch.save(self.state_dict(), filename)

    def load_model(self, filename):
        """Load model weights

        Args:
            filename (string): File Path to model weights
        """    
        self.load_state_dict(torch.load(filename))

Ancestors

torch.nn.modules.module.Module

Class variables

var dump_patches : bool
var training : bool

Methods

def forward(self, state) ‑> Callable[..., Any]

Forward pass

Args

state : array_like: Current environment state

Returns

action: Agent's Action Values

Expand source code

def forward(self, state):
    """Forward pass

    Args:
        state (array_like): Current environment state

    Returns:
        action: Agent's Action Values
    """        
    x = F.relu(self.dense1(state))
    x = F.relu(self.dense2(x))
    # squashes the action output to a range of -1 to +1
    x = self.dense3(x)
    # x = torch.tanh(self.dense3(x))
    # assumes action range is symmetric
    x = nn.Softmax(dim=1)(self.max_action * x)
    return x

def load_model(self, filename)

Load model weights

Args

filename : string: File Path to model weights

Expand source code

def load_model(self, filename):
    """Load model weights

    Args:
        filename (string): File Path to model weights
    """    
    self.load_state_dict(torch.load(filename))

def save_model(self, filename)

Save model weights

Args

filename : string: File Path to save model

Expand source code

def save_model(self, filename):
    """Save model weights

    Args:
        filename (string): File Path to save model
    """    
    torch.save(self.state_dict(), filename)

class Critic (state_dim, action_dim, lr=0.001)

This is the critic network for the DDPG Agent.

Original paper can be found at https://arxiv.org/abs/1509.02971

This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/ddpg

Initializes the DDPG Critic Network

Args

state_dim : int: State space dimension
action_dim : int: Action space dimension
lr : float, optional: Learning rate. Defaults to 0.001.

Expand source code

class Critic(nn.Module):
    """
    This is the critic network for the DDPG Agent.

    Original paper can be found at https://arxiv.org/abs/1509.02971

    This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/ddpg

    """

    def __init__(self, state_dim, action_dim, lr=1e-3):
        """Initializes the DDPG Critic Network

        Args:
            state_dim (int): State space dimension
            action_dim (int): Action space dimension
            lr (float, optional): Learning rate. Defaults to 0.001.
        """   
        super(Critic, self).__init__()

        # the input to the network is a concatenation of the state and the action performed by the agent in that state
        self.dense1 = nn.Linear(state_dim, 400)
        apply_uniform_init(self.dense1)

        #self.bn1 = nn.BatchNorm1d(400)

        self.dense2 = nn.Linear(400 + action_dim, 300)
        apply_uniform_init(self.dense2)

        #self.bn2 = nn.BatchNorm1d(300)

        self.dense3 = nn.Linear(300, 1)
        apply_uniform_init(self.dense3, bound=3*10e-4)

        self.optimizer = optim.Adam(
            self.parameters(), lr=lr, weight_decay=1e-2)

    def forward(self, state, action):
        """Forward pass

        Args:
            state (array_like): Current environment state
            action (array_like): Current agent's action

        Returns:
            out: State-Action Values
        """
        #x = torch.cat([state, action], dim=1)

        x = F.relu(self.dense1(state))

        x = torch.cat([x, action], dim=1)
        x = F.relu(self.dense2(x))
        # the computed Q-value for the given state-action pair
        x = self.dense3(x)

        return x

    def save_model(self, filename):
        """Save model weights

        Args:
            filename (string): File Path to save model
        """    
        torch.save(self.state_dict(), filename)

    def load_model(self, filename):
        """Load model weights

        Args:
            filename (string): File Path to model weights
        """    
        self.load_state_dict(torch.load(filename))

Ancestors

torch.nn.modules.module.Module

Class variables

var dump_patches : bool
var training : bool

Methods

def forward(self, state, action) ‑> Callable[..., Any]

Forward pass

Args

state : array_like: Current environment state
action : array_like: Current agent's action

Returns

out: State-Action Values

Expand source code

def forward(self, state, action):
    """Forward pass

    Args:
        state (array_like): Current environment state
        action (array_like): Current agent's action

    Returns:
        out: State-Action Values
    """
    #x = torch.cat([state, action], dim=1)

    x = F.relu(self.dense1(state))

    x = torch.cat([x, action], dim=1)
    x = F.relu(self.dense2(x))
    # the computed Q-value for the given state-action pair
    x = self.dense3(x)

    return x

def load_model(self, filename)

Load model weights

Args

filename : string: File Path to model weights

Expand source code

def load_model(self, filename):
    """Load model weights

    Args:
        filename (string): File Path to model weights
    """    
    self.load_state_dict(torch.load(filename))

def save_model(self, filename)

Save model weights

Args

filename : string: File Path to save model

Expand source code

def save_model(self, filename):
    """Save model weights

    Args:
        filename (string): File Path to save model
    """    
    torch.save(self.state_dict(), filename)