Module AssetAllocator.algorithms.NAF.network
Expand source code
import torch
import torch.nn as nn
from torch.distributions import MultivariateNormal
class NAFNetwork(nn.Module):
"""
Neural Network Approximator for NAF.
Original paper can be found at https://arxiv.org/abs/1906.04594
This implementation was adapted from https://github.com/BY571/Normalized-Advantage-Function-NAF-
"""
def __init__(self, state_size, action_size,layer_size, seed, device):
"""
Computes the forward pass of the NAF Network
Params
=====
state_size: state space size
action_size: action space size
layer_size: number of neurons in hidden layer
seef: random seed
device: one of cuda or cpu
"""
super(NAFNetwork, self).__init__()
self.seed = torch.manual_seed(seed)
self.input_shape = state_size
self.action_size = action_size
self.device = device
self.head_1 = nn.Linear(self.input_shape, layer_size)
self.bn1 = nn.BatchNorm1d(layer_size)
self.ff_1 = nn.Linear(layer_size, layer_size)
self.bn2 = nn.BatchNorm1d(layer_size)
self.action_values = nn.Linear(layer_size, action_size)
self.value = nn.Linear(layer_size, 1)
self.matrix_entries = nn.Linear(layer_size, int(self.action_size*(self.action_size+1)/2))
def forward(self, input_, action=None):
"""
Computes the forward pass of the NAF Network
Params
=====
input_ : State tensor
action : Action tensor
"""
x = torch.relu(self.head_1(input_))
x = torch.relu(self.ff_1(x))
action_value = torch.tanh(self.action_values(x))
entries = torch.tanh(self.matrix_entries(x))
V = self.value(x)
action_value = action_value.unsqueeze(-1)
# create lower-triangular matrix
L = torch.zeros((input_.shape[0], self.action_size, self.action_size)).to(self.device)
# get lower triagular indices
tril_indices = torch.tril_indices(row=self.action_size, col=self.action_size, offset=0)
# fill matrix with entries
L[:, tril_indices[0], tril_indices[1]] = entries
L.diagonal(dim1=1,dim2=2).exp_()
# calculate state-dependent, positive-definite square matrix
P = L*L.transpose(2, 1)
Q = None
if action is not None:
# calculate Advantage:
A = (-0.5 * torch.matmul(torch.matmul((action.unsqueeze(-1) - action_value).transpose(2, 1), P), (action.unsqueeze(-1) - action_value))).squeeze(-1)
Q = A + V
# add noise to action mu:
dist = MultivariateNormal(action_value.squeeze(-1), torch.inverse(P))
#dist = Normal(action_value.squeeze(-1), 1)
action = dist.sample()
action = torch.clamp(action, min=-1, max=1)
#action = action_value.squeeze(-1)
return action, Q, V
Classes
class NAFNetwork (state_size, action_size, layer_size, seed, device)
-
Neural Network Approximator for NAF.
Original paper can be found at https://arxiv.org/abs/1906.04594
This implementation was adapted from https://github.com/BY571/Normalized-Advantage-Function-NAF-
Computes the forward pass of the NAF Network
Params
state_size: state space size action_size: action space size layer_size: number of neurons in hidden layer seef: random seed device: one of cuda or cpu
Expand source code
class NAFNetwork(nn.Module): """ Neural Network Approximator for NAF. Original paper can be found at https://arxiv.org/abs/1906.04594 This implementation was adapted from https://github.com/BY571/Normalized-Advantage-Function-NAF- """ def __init__(self, state_size, action_size,layer_size, seed, device): """ Computes the forward pass of the NAF Network Params ===== state_size: state space size action_size: action space size layer_size: number of neurons in hidden layer seef: random seed device: one of cuda or cpu """ super(NAFNetwork, self).__init__() self.seed = torch.manual_seed(seed) self.input_shape = state_size self.action_size = action_size self.device = device self.head_1 = nn.Linear(self.input_shape, layer_size) self.bn1 = nn.BatchNorm1d(layer_size) self.ff_1 = nn.Linear(layer_size, layer_size) self.bn2 = nn.BatchNorm1d(layer_size) self.action_values = nn.Linear(layer_size, action_size) self.value = nn.Linear(layer_size, 1) self.matrix_entries = nn.Linear(layer_size, int(self.action_size*(self.action_size+1)/2)) def forward(self, input_, action=None): """ Computes the forward pass of the NAF Network Params ===== input_ : State tensor action : Action tensor """ x = torch.relu(self.head_1(input_)) x = torch.relu(self.ff_1(x)) action_value = torch.tanh(self.action_values(x)) entries = torch.tanh(self.matrix_entries(x)) V = self.value(x) action_value = action_value.unsqueeze(-1) # create lower-triangular matrix L = torch.zeros((input_.shape[0], self.action_size, self.action_size)).to(self.device) # get lower triagular indices tril_indices = torch.tril_indices(row=self.action_size, col=self.action_size, offset=0) # fill matrix with entries L[:, tril_indices[0], tril_indices[1]] = entries L.diagonal(dim1=1,dim2=2).exp_() # calculate state-dependent, positive-definite square matrix P = L*L.transpose(2, 1) Q = None if action is not None: # calculate Advantage: A = (-0.5 * torch.matmul(torch.matmul((action.unsqueeze(-1) - action_value).transpose(2, 1), P), (action.unsqueeze(-1) - action_value))).squeeze(-1) Q = A + V # add noise to action mu: dist = MultivariateNormal(action_value.squeeze(-1), torch.inverse(P)) #dist = Normal(action_value.squeeze(-1), 1) action = dist.sample() action = torch.clamp(action, min=-1, max=1) #action = action_value.squeeze(-1) return action, Q, V
Ancestors
- torch.nn.modules.module.Module
Class variables
var dump_patches : bool
var training : bool
Methods
def forward(self, input_, action=None) ‑> Callable[..., Any]
-
Computes the forward pass of the NAF Network
Params
input_ : State tensor action : Action tensor
Expand source code
def forward(self, input_, action=None): """ Computes the forward pass of the NAF Network Params ===== input_ : State tensor action : Action tensor """ x = torch.relu(self.head_1(input_)) x = torch.relu(self.ff_1(x)) action_value = torch.tanh(self.action_values(x)) entries = torch.tanh(self.matrix_entries(x)) V = self.value(x) action_value = action_value.unsqueeze(-1) # create lower-triangular matrix L = torch.zeros((input_.shape[0], self.action_size, self.action_size)).to(self.device) # get lower triagular indices tril_indices = torch.tril_indices(row=self.action_size, col=self.action_size, offset=0) # fill matrix with entries L[:, tril_indices[0], tril_indices[1]] = entries L.diagonal(dim1=1,dim2=2).exp_() # calculate state-dependent, positive-definite square matrix P = L*L.transpose(2, 1) Q = None if action is not None: # calculate Advantage: A = (-0.5 * torch.matmul(torch.matmul((action.unsqueeze(-1) - action_value).transpose(2, 1), P), (action.unsqueeze(-1) - action_value))).squeeze(-1) Q = A + V # add noise to action mu: dist = MultivariateNormal(action_value.squeeze(-1), torch.inverse(P)) #dist = Normal(action_value.squeeze(-1), 1) action = dist.sample() action = torch.clamp(action, min=-1, max=1) #action = action_value.squeeze(-1) return action, Q, V