Module AssetAllocator.algorithms.DDPG.Network

Script that describes the details about the Actor and Critic architectures for the DDPG agent

Expand source code
"""
Script that describes the details about the Actor and Critic architectures for the DDPG agent
"""
import os
import math
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F


def get_fan_in_init_bound(layer):
    """
    Function to compute the initialisation bound at 1/sqrt(f), where f is the fan-in value (i.e., number of inputs) of the given layer
    Parameters
    ---
    layer: torch.nn.module
        The layer of the network to be initialised
    Returns
    ---
    the fan-in based upper bound to be used for initialisation, such that the lower bound is the negative of this value
    """
    fan_in, fan_out = nn.init._calculate_fan_in_and_fan_out(layer.weight)
    # fan_in = layer.weight.size(1) ## a potential solution to computing fan-in when using linear layers as the shape of the weight of the linear layer is [fan_out, fan_in]
    return 1/math.sqrt(fan_in)


def apply_uniform_init(layer, bound=None):
    """
    Function to initialise the specified layer using either the provided bound value or the fan-in based bound (suggested in the DDPG paper for hidden layers)
    Parameters
    ---
    layer: torch.nn.module
        The layer of the network to be initialised
    bound: float or None
        Specifies the value for the upper bound of the initialisation, such that the lower bound is the negative of this value. If None, then use fan-in based initilisation
    Returns
    ---
    none
    """
    if bound is None:
        bound = get_fan_in_init_bound(layer)
    nn.init.uniform_(layer.weight, a=-bound, b=bound)  # initalise the weights
    nn.init.uniform_(layer.bias, a=-bound, b=bound)  # initialise the biases


class Actor(nn.Module):
    """
    This is the actor network for the DDPG Agent.

    Original paper can be found at https://arxiv.org/abs/1509.02971

    This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/ddpg

    """

    def __init__(self, state_dim, action_dim, max_action, lr=1e-4):
        """Initialized the DDPG Actor Network

        Args:
            state_dim (int): State space dimension
            action_dim (int): Action space dimension
            lr (float, optional): Learning rate. Defaults to 0.0001.
            max_action (int, optional): Action scaling value. Defaults to 1.
        """    
        super(Actor, self).__init__()

        self.max_action = max_action

        self.dense1 = nn.Linear(state_dim, 400)
        apply_uniform_init(self.dense1)

        self.dense2 = nn.Linear(400, 300)
        apply_uniform_init(self.dense2)


        self.dense3 = nn.Linear(300, action_dim)
        apply_uniform_init(self.dense3, bound=3*10e-3)

        self.optimizer = optim.Adam(self.parameters(), lr=lr)

    def forward(self, state):
        """Forward pass

        Args:
            state (array_like): Current environment state

        Returns:
            action: Agent's Action Values
        """        
        x = F.relu(self.dense1(state))
        x = F.relu(self.dense2(x))
        # squashes the action output to a range of -1 to +1
        x = self.dense3(x)
        # x = torch.tanh(self.dense3(x))
        # assumes action range is symmetric
        x = nn.Softmax(dim=1)(self.max_action * x)
        return x

    def save_model(self, filename):
        """Save model weights

        Args:
            filename (string): File Path to save model
        """    
        torch.save(self.state_dict(), filename)

    def load_model(self, filename):
        """Load model weights

        Args:
            filename (string): File Path to model weights
        """    
        self.load_state_dict(torch.load(filename))


class Critic(nn.Module):
    """
    This is the critic network for the DDPG Agent.

    Original paper can be found at https://arxiv.org/abs/1509.02971

    This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/ddpg

    """

    def __init__(self, state_dim, action_dim, lr=1e-3):
        """Initializes the DDPG Critic Network

        Args:
            state_dim (int): State space dimension
            action_dim (int): Action space dimension
            lr (float, optional): Learning rate. Defaults to 0.001.
        """   
        super(Critic, self).__init__()

        # the input to the network is a concatenation of the state and the action performed by the agent in that state
        self.dense1 = nn.Linear(state_dim, 400)
        apply_uniform_init(self.dense1)

        #self.bn1 = nn.BatchNorm1d(400)

        self.dense2 = nn.Linear(400 + action_dim, 300)
        apply_uniform_init(self.dense2)

        #self.bn2 = nn.BatchNorm1d(300)

        self.dense3 = nn.Linear(300, 1)
        apply_uniform_init(self.dense3, bound=3*10e-4)

        self.optimizer = optim.Adam(
            self.parameters(), lr=lr, weight_decay=1e-2)

    def forward(self, state, action):
        """Forward pass

        Args:
            state (array_like): Current environment state
            action (array_like): Current agent's action

        Returns:
            out: State-Action Values
        """
        #x = torch.cat([state, action], dim=1)

        x = F.relu(self.dense1(state))

        x = torch.cat([x, action], dim=1)
        x = F.relu(self.dense2(x))
        # the computed Q-value for the given state-action pair
        x = self.dense3(x)

        return x

    def save_model(self, filename):
        """Save model weights

        Args:
            filename (string): File Path to save model
        """    
        torch.save(self.state_dict(), filename)

    def load_model(self, filename):
        """Load model weights

        Args:
            filename (string): File Path to model weights
        """    
        self.load_state_dict(torch.load(filename))

Functions

def apply_uniform_init(layer, bound=None)

Function to initialise the specified layer using either the provided bound value or the fan-in based bound (suggested in the DDPG paper for hidden layers) Parameters


layer : torch.nn.module
The layer of the network to be initialised
bound : float or None
Specifies the value for the upper bound of the initialisation, such that the lower bound is the negative of this value. If None, then use fan-in based initilisation

Returns

none
 
Expand source code
def apply_uniform_init(layer, bound=None):
    """
    Function to initialise the specified layer using either the provided bound value or the fan-in based bound (suggested in the DDPG paper for hidden layers)
    Parameters
    ---
    layer: torch.nn.module
        The layer of the network to be initialised
    bound: float or None
        Specifies the value for the upper bound of the initialisation, such that the lower bound is the negative of this value. If None, then use fan-in based initilisation
    Returns
    ---
    none
    """
    if bound is None:
        bound = get_fan_in_init_bound(layer)
    nn.init.uniform_(layer.weight, a=-bound, b=bound)  # initalise the weights
    nn.init.uniform_(layer.bias, a=-bound, b=bound)  # initialise the biases
def get_fan_in_init_bound(layer)

Function to compute the initialisation bound at 1/sqrt(f), where f is the fan-in value (i.e., number of inputs) of the given layer Parameters


layer : torch.nn.module
The layer of the network to be initialised

Returns

the fan-in based upper bound to be used for initialisation, such that the lower bound is the negative of this value
 
Expand source code
def get_fan_in_init_bound(layer):
    """
    Function to compute the initialisation bound at 1/sqrt(f), where f is the fan-in value (i.e., number of inputs) of the given layer
    Parameters
    ---
    layer: torch.nn.module
        The layer of the network to be initialised
    Returns
    ---
    the fan-in based upper bound to be used for initialisation, such that the lower bound is the negative of this value
    """
    fan_in, fan_out = nn.init._calculate_fan_in_and_fan_out(layer.weight)
    # fan_in = layer.weight.size(1) ## a potential solution to computing fan-in when using linear layers as the shape of the weight of the linear layer is [fan_out, fan_in]
    return 1/math.sqrt(fan_in)

Classes

class Actor (state_dim, action_dim, max_action, lr=0.0001)

This is the actor network for the DDPG Agent.

Original paper can be found at https://arxiv.org/abs/1509.02971

This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/ddpg

Initialized the DDPG Actor Network

Args

state_dim : int
State space dimension
action_dim : int
Action space dimension
lr : float, optional
Learning rate. Defaults to 0.0001.
max_action : int, optional
Action scaling value. Defaults to 1.
Expand source code
class Actor(nn.Module):
    """
    This is the actor network for the DDPG Agent.

    Original paper can be found at https://arxiv.org/abs/1509.02971

    This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/ddpg

    """

    def __init__(self, state_dim, action_dim, max_action, lr=1e-4):
        """Initialized the DDPG Actor Network

        Args:
            state_dim (int): State space dimension
            action_dim (int): Action space dimension
            lr (float, optional): Learning rate. Defaults to 0.0001.
            max_action (int, optional): Action scaling value. Defaults to 1.
        """    
        super(Actor, self).__init__()

        self.max_action = max_action

        self.dense1 = nn.Linear(state_dim, 400)
        apply_uniform_init(self.dense1)

        self.dense2 = nn.Linear(400, 300)
        apply_uniform_init(self.dense2)


        self.dense3 = nn.Linear(300, action_dim)
        apply_uniform_init(self.dense3, bound=3*10e-3)

        self.optimizer = optim.Adam(self.parameters(), lr=lr)

    def forward(self, state):
        """Forward pass

        Args:
            state (array_like): Current environment state

        Returns:
            action: Agent's Action Values
        """        
        x = F.relu(self.dense1(state))
        x = F.relu(self.dense2(x))
        # squashes the action output to a range of -1 to +1
        x = self.dense3(x)
        # x = torch.tanh(self.dense3(x))
        # assumes action range is symmetric
        x = nn.Softmax(dim=1)(self.max_action * x)
        return x

    def save_model(self, filename):
        """Save model weights

        Args:
            filename (string): File Path to save model
        """    
        torch.save(self.state_dict(), filename)

    def load_model(self, filename):
        """Load model weights

        Args:
            filename (string): File Path to model weights
        """    
        self.load_state_dict(torch.load(filename))

Ancestors

  • torch.nn.modules.module.Module

Class variables

var dump_patches : bool
var training : bool

Methods

def forward(self, state) ‑> Callable[..., Any]

Forward pass

Args

state : array_like
Current environment state

Returns

action
Agent's Action Values
Expand source code
def forward(self, state):
    """Forward pass

    Args:
        state (array_like): Current environment state

    Returns:
        action: Agent's Action Values
    """        
    x = F.relu(self.dense1(state))
    x = F.relu(self.dense2(x))
    # squashes the action output to a range of -1 to +1
    x = self.dense3(x)
    # x = torch.tanh(self.dense3(x))
    # assumes action range is symmetric
    x = nn.Softmax(dim=1)(self.max_action * x)
    return x
def load_model(self, filename)

Load model weights

Args

filename : string
File Path to model weights
Expand source code
def load_model(self, filename):
    """Load model weights

    Args:
        filename (string): File Path to model weights
    """    
    self.load_state_dict(torch.load(filename))
def save_model(self, filename)

Save model weights

Args

filename : string
File Path to save model
Expand source code
def save_model(self, filename):
    """Save model weights

    Args:
        filename (string): File Path to save model
    """    
    torch.save(self.state_dict(), filename)
class Critic (state_dim, action_dim, lr=0.001)

This is the critic network for the DDPG Agent.

Original paper can be found at https://arxiv.org/abs/1509.02971

This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/ddpg

Initializes the DDPG Critic Network

Args

state_dim : int
State space dimension
action_dim : int
Action space dimension
lr : float, optional
Learning rate. Defaults to 0.001.
Expand source code
class Critic(nn.Module):
    """
    This is the critic network for the DDPG Agent.

    Original paper can be found at https://arxiv.org/abs/1509.02971

    This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/ddpg

    """

    def __init__(self, state_dim, action_dim, lr=1e-3):
        """Initializes the DDPG Critic Network

        Args:
            state_dim (int): State space dimension
            action_dim (int): Action space dimension
            lr (float, optional): Learning rate. Defaults to 0.001.
        """   
        super(Critic, self).__init__()

        # the input to the network is a concatenation of the state and the action performed by the agent in that state
        self.dense1 = nn.Linear(state_dim, 400)
        apply_uniform_init(self.dense1)

        #self.bn1 = nn.BatchNorm1d(400)

        self.dense2 = nn.Linear(400 + action_dim, 300)
        apply_uniform_init(self.dense2)

        #self.bn2 = nn.BatchNorm1d(300)

        self.dense3 = nn.Linear(300, 1)
        apply_uniform_init(self.dense3, bound=3*10e-4)

        self.optimizer = optim.Adam(
            self.parameters(), lr=lr, weight_decay=1e-2)

    def forward(self, state, action):
        """Forward pass

        Args:
            state (array_like): Current environment state
            action (array_like): Current agent's action

        Returns:
            out: State-Action Values
        """
        #x = torch.cat([state, action], dim=1)

        x = F.relu(self.dense1(state))

        x = torch.cat([x, action], dim=1)
        x = F.relu(self.dense2(x))
        # the computed Q-value for the given state-action pair
        x = self.dense3(x)

        return x

    def save_model(self, filename):
        """Save model weights

        Args:
            filename (string): File Path to save model
        """    
        torch.save(self.state_dict(), filename)

    def load_model(self, filename):
        """Load model weights

        Args:
            filename (string): File Path to model weights
        """    
        self.load_state_dict(torch.load(filename))

Ancestors

  • torch.nn.modules.module.Module

Class variables

var dump_patches : bool
var training : bool

Methods

def forward(self, state, action) ‑> Callable[..., Any]

Forward pass

Args

state : array_like
Current environment state
action : array_like
Current agent's action

Returns

out
State-Action Values
Expand source code
def forward(self, state, action):
    """Forward pass

    Args:
        state (array_like): Current environment state
        action (array_like): Current agent's action

    Returns:
        out: State-Action Values
    """
    #x = torch.cat([state, action], dim=1)

    x = F.relu(self.dense1(state))

    x = torch.cat([x, action], dim=1)
    x = F.relu(self.dense2(x))
    # the computed Q-value for the given state-action pair
    x = self.dense3(x)

    return x
def load_model(self, filename)

Load model weights

Args

filename : string
File Path to model weights
Expand source code
def load_model(self, filename):
    """Load model weights

    Args:
        filename (string): File Path to model weights
    """    
    self.load_state_dict(torch.load(filename))
def save_model(self, filename)

Save model weights

Args

filename : string
File Path to save model
Expand source code
def save_model(self, filename):
    """Save model weights

    Args:
        filename (string): File Path to save model
    """    
    torch.save(self.state_dict(), filename)