Module AssetAllocator.algorithms.TRPO.policy
Expand source code
import torch
import torch.autograd as autograd
from torch.autograd import Variable
import torch.nn as nn
import numpy as np
torch.set_default_tensor_type('torch.DoubleTensor')
class Policy(nn.Module):
def __init__(self, num_inputs,num_outputs,hidden_size, device):
super(Policy, self).__init__()
self.inputLayer = nn.Linear(num_inputs, hidden_size)
self.hiddenLayer = nn.Linear(hidden_size, hidden_size)
self.hiddenLayer2 = nn.Linear(hidden_size, hidden_size)
self.outputLayer = nn.Linear(hidden_size, num_outputs)
self.logStd = nn.Parameter(torch.zeros(1, num_outputs))
self.device = device
def forward(self, x):
"""
Parameters:
states (torch.Tensor): N_state x N_sample
Returns:
torch.Tensor: N_action x N_sample | mean of the action
torch.Tensor: N_action x N_sample | log(std) of action
torch.Tensor: N_action x N_sample | std of action
"""
x = x.double().to(self.device)
x = torch.tanh(self.inputLayer(x))
x = torch.tanh(self.hiddenLayer(x))
x = torch.tanh(self.hiddenLayer2(x))
action_mean = self.outputLayer(x)
action_logStd = self.logStd.expand_as(action_mean)
action_std = torch.exp(self.logStd)
return action_mean, action_logStd, action_std
def getLogProbabilityDensity(self,states,actions):
"""
Parameters:
states (torch.Tensor): N_state x N_sample | The states of the samples
actions (torch.Tensor): N_action x N_sample | The action taken for this samples
Returns:
torch.Tensor: Log probability of the actions calculated by gaussian distribution
"""
action_mean, logStd, action_std = self.forward(states.to(self.device))
var = torch.exp(logStd).pow(2);
#print(actions.shape, action_mean.shape, var.shape, logStd.shape)
logProbablitiesDensity_ = -(actions.reshape(action_mean.shape) - action_mean).pow(2) / (
2 * var) - 0.5 * np.log(2 * np.pi) - logStd;
#print(logProbablitiesDensity_.shape)
#assert False, 'ad'
return logProbablitiesDensity_.sum(1);
def meanKlDivergence(self, states, actions, logProbablityOld):
"""
Parameters:
states (torch.Tensor): N_state x N_sample | The states of the samples
actions (torch.Tensor): N_action x N_sample | The action taken for this samples
logProbablityOld (torch.Tensor): N_sample | Log probablility of the action, note that
this should be detached from the gradient.
Returns:
torch.Tensor: Scalar | the mean of KL-divergence
"""
logProbabilityNew = self.getLogProbabilityDensity(states.to(self.device),actions.to(self.device));
return (torch.exp(logProbablityOld)
* (logProbablityOld - logProbabilityNew)).mean(); #Tensor kl.mean()
def get_action(self,state):
"""
Parameters:
states (numpy.array): N_state
Returns:
numpy.array: N_action | sampled action
"""
state = torch.from_numpy(state).unsqueeze(0).to(self.device)
action_mean, action_log_std, action_std = self.forward(state)
action = torch.normal(action_mean, action_std)
action = torch.nn.Softmax(dim = 1)(action)
return action.cpu().detach().numpy()
def get_mean_action(self,state):
"""
Parameters:
states (numpy.array): N_state
Returns:
numpy.array: N_action | mean action
"""
state = torch.from_numpy(state).unsqueeze(0).to(self.device)
action_mean, action_log_std, action_std = self.forward(state)
return action_mean.cpu().detach().numpy()
Classes
class Policy (num_inputs, num_outputs, hidden_size, device)
-
Base class for all neural network modules.
Your models should also subclass this class.
Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes::
import torch.nn as nn import torch.nn.functional as F class Model(nn.Module): def __init__(self): super(Model, self).__init__() self.conv1 = nn.Conv2d(1, 20, 5) self.conv2 = nn.Conv2d(20, 20, 5) def forward(self, x): x = F.relu(self.conv1(x)) return F.relu(self.conv2(x))
Submodules assigned in this way will be registered, and will have their parameters converted too when you call :meth:
to
, etc.:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool
Initializes internal Module state, shared by both nn.Module and ScriptModule.
Expand source code
class Policy(nn.Module): def __init__(self, num_inputs,num_outputs,hidden_size, device): super(Policy, self).__init__() self.inputLayer = nn.Linear(num_inputs, hidden_size) self.hiddenLayer = nn.Linear(hidden_size, hidden_size) self.hiddenLayer2 = nn.Linear(hidden_size, hidden_size) self.outputLayer = nn.Linear(hidden_size, num_outputs) self.logStd = nn.Parameter(torch.zeros(1, num_outputs)) self.device = device def forward(self, x): """ Parameters: states (torch.Tensor): N_state x N_sample Returns: torch.Tensor: N_action x N_sample | mean of the action torch.Tensor: N_action x N_sample | log(std) of action torch.Tensor: N_action x N_sample | std of action """ x = x.double().to(self.device) x = torch.tanh(self.inputLayer(x)) x = torch.tanh(self.hiddenLayer(x)) x = torch.tanh(self.hiddenLayer2(x)) action_mean = self.outputLayer(x) action_logStd = self.logStd.expand_as(action_mean) action_std = torch.exp(self.logStd) return action_mean, action_logStd, action_std def getLogProbabilityDensity(self,states,actions): """ Parameters: states (torch.Tensor): N_state x N_sample | The states of the samples actions (torch.Tensor): N_action x N_sample | The action taken for this samples Returns: torch.Tensor: Log probability of the actions calculated by gaussian distribution """ action_mean, logStd, action_std = self.forward(states.to(self.device)) var = torch.exp(logStd).pow(2); #print(actions.shape, action_mean.shape, var.shape, logStd.shape) logProbablitiesDensity_ = -(actions.reshape(action_mean.shape) - action_mean).pow(2) / ( 2 * var) - 0.5 * np.log(2 * np.pi) - logStd; #print(logProbablitiesDensity_.shape) #assert False, 'ad' return logProbablitiesDensity_.sum(1); def meanKlDivergence(self, states, actions, logProbablityOld): """ Parameters: states (torch.Tensor): N_state x N_sample | The states of the samples actions (torch.Tensor): N_action x N_sample | The action taken for this samples logProbablityOld (torch.Tensor): N_sample | Log probablility of the action, note that this should be detached from the gradient. Returns: torch.Tensor: Scalar | the mean of KL-divergence """ logProbabilityNew = self.getLogProbabilityDensity(states.to(self.device),actions.to(self.device)); return (torch.exp(logProbablityOld) * (logProbablityOld - logProbabilityNew)).mean(); #Tensor kl.mean() def get_action(self,state): """ Parameters: states (numpy.array): N_state Returns: numpy.array: N_action | sampled action """ state = torch.from_numpy(state).unsqueeze(0).to(self.device) action_mean, action_log_std, action_std = self.forward(state) action = torch.normal(action_mean, action_std) action = torch.nn.Softmax(dim = 1)(action) return action.cpu().detach().numpy() def get_mean_action(self,state): """ Parameters: states (numpy.array): N_state Returns: numpy.array: N_action | mean action """ state = torch.from_numpy(state).unsqueeze(0).to(self.device) action_mean, action_log_std, action_std = self.forward(state) return action_mean.cpu().detach().numpy()
Ancestors
- torch.nn.modules.module.Module
Class variables
var dump_patches : bool
var training : bool
Methods
def forward(self, x) ‑> Callable[..., Any]
-
Parameters: states (torch.Tensor): N_state x N_sample
Returns: torch.Tensor: N_action x N_sample | mean of the action torch.Tensor: N_action x N_sample | log(std) of action torch.Tensor: N_action x N_sample | std of action
Expand source code
def forward(self, x): """ Parameters: states (torch.Tensor): N_state x N_sample Returns: torch.Tensor: N_action x N_sample | mean of the action torch.Tensor: N_action x N_sample | log(std) of action torch.Tensor: N_action x N_sample | std of action """ x = x.double().to(self.device) x = torch.tanh(self.inputLayer(x)) x = torch.tanh(self.hiddenLayer(x)) x = torch.tanh(self.hiddenLayer2(x)) action_mean = self.outputLayer(x) action_logStd = self.logStd.expand_as(action_mean) action_std = torch.exp(self.logStd) return action_mean, action_logStd, action_std
def getLogProbabilityDensity(self, states, actions)
-
Parameters: states (torch.Tensor): N_state x N_sample | The states of the samples actions (torch.Tensor): N_action x N_sample | The action taken for this samples
Returns: torch.Tensor: Log probability of the actions calculated by gaussian distribution
Expand source code
def getLogProbabilityDensity(self,states,actions): """ Parameters: states (torch.Tensor): N_state x N_sample | The states of the samples actions (torch.Tensor): N_action x N_sample | The action taken for this samples Returns: torch.Tensor: Log probability of the actions calculated by gaussian distribution """ action_mean, logStd, action_std = self.forward(states.to(self.device)) var = torch.exp(logStd).pow(2); #print(actions.shape, action_mean.shape, var.shape, logStd.shape) logProbablitiesDensity_ = -(actions.reshape(action_mean.shape) - action_mean).pow(2) / ( 2 * var) - 0.5 * np.log(2 * np.pi) - logStd; #print(logProbablitiesDensity_.shape) #assert False, 'ad' return logProbablitiesDensity_.sum(1);
def get_action(self, state)
-
Parameters: states (numpy.array): N_state
Returns: numpy.array: N_action | sampled action
Expand source code
def get_action(self,state): """ Parameters: states (numpy.array): N_state Returns: numpy.array: N_action | sampled action """ state = torch.from_numpy(state).unsqueeze(0).to(self.device) action_mean, action_log_std, action_std = self.forward(state) action = torch.normal(action_mean, action_std) action = torch.nn.Softmax(dim = 1)(action) return action.cpu().detach().numpy()
def get_mean_action(self, state)
-
Parameters: states (numpy.array): N_state
Returns: numpy.array: N_action | mean action
Expand source code
def get_mean_action(self,state): """ Parameters: states (numpy.array): N_state Returns: numpy.array: N_action | mean action """ state = torch.from_numpy(state).unsqueeze(0).to(self.device) action_mean, action_log_std, action_std = self.forward(state) return action_mean.cpu().detach().numpy()
def meanKlDivergence(self, states, actions, logProbablityOld)
-
Parameters: states (torch.Tensor): N_state x N_sample | The states of the samples actions (torch.Tensor): N_action x N_sample | The action taken for this samples logProbablityOld (torch.Tensor): N_sample | Log probablility of the action, note that this should be detached from the gradient.
Returns: torch.Tensor: Scalar | the mean of KL-divergence
Expand source code
def meanKlDivergence(self, states, actions, logProbablityOld): """ Parameters: states (torch.Tensor): N_state x N_sample | The states of the samples actions (torch.Tensor): N_action x N_sample | The action taken for this samples logProbablityOld (torch.Tensor): N_sample | Log probablility of the action, note that this should be detached from the gradient. Returns: torch.Tensor: Scalar | the mean of KL-divergence """ logProbabilityNew = self.getLogProbabilityDensity(states.to(self.device),actions.to(self.device)); return (torch.exp(logProbablityOld) * (logProbablityOld - logProbabilityNew)).mean(); #Tensor kl.mean()