Module AssetAllocator.algorithms.TRPO.policy
Expand source code
import torch
import torch.autograd as autograd
from torch.autograd import Variable
import torch.nn as nn
import numpy as np
torch.set_default_tensor_type('torch.DoubleTensor')
class Policy(nn.Module):
    def __init__(self, num_inputs,num_outputs,hidden_size, device):
        super(Policy, self).__init__()
        self.inputLayer = nn.Linear(num_inputs, hidden_size)
        self.hiddenLayer = nn.Linear(hidden_size, hidden_size)
        self.hiddenLayer2 = nn.Linear(hidden_size, hidden_size)
        self.outputLayer = nn.Linear(hidden_size, num_outputs)
        self.logStd = nn.Parameter(torch.zeros(1, num_outputs))
        self.device = device
    def forward(self, x):
        """
        Parameters:
        states (torch.Tensor): N_state x N_sample
        Returns:
        torch.Tensor:  N_action x N_sample  | mean of the action
        torch.Tensor:  N_action x N_sample  | log(std) of action
        torch.Tensor:  N_action x N_sample  | std of action
        """
        x = x.double().to(self.device)
        x = torch.tanh(self.inputLayer(x))
        x = torch.tanh(self.hiddenLayer(x))
        x = torch.tanh(self.hiddenLayer2(x))
        action_mean = self.outputLayer(x)
        action_logStd = self.logStd.expand_as(action_mean)
        action_std = torch.exp(self.logStd)
        return action_mean, action_logStd, action_std
    def getLogProbabilityDensity(self,states,actions):
        """
        Parameters:
        states (torch.Tensor): N_state x N_sample | The states of the samples
        actions (torch.Tensor): N_action x N_sample | The action taken for this samples
        Returns:
        torch.Tensor: Log probability of the actions calculated by gaussian distribution
        """
        action_mean, logStd, action_std = self.forward(states.to(self.device))
        var = torch.exp(logStd).pow(2);
        #print(actions.shape, action_mean.shape, var.shape, logStd.shape)
        logProbablitiesDensity_ = -(actions.reshape(action_mean.shape) - action_mean).pow(2) / (
            2 * var) - 0.5 * np.log(2 * np.pi) - logStd;
        #print(logProbablitiesDensity_.shape)
        #assert False, 'ad'
        return logProbablitiesDensity_.sum(1);
    def meanKlDivergence(self, states, actions, logProbablityOld):
        """
        Parameters:
        states (torch.Tensor): N_state x N_sample | The states of the samples
        actions (torch.Tensor): N_action x N_sample | The action taken for this samples
        logProbablityOld (torch.Tensor): N_sample |  Log probablility of the action, note that
            this should be detached from the gradient.
        Returns:
        torch.Tensor: Scalar | the mean of KL-divergence
        """
        logProbabilityNew = self.getLogProbabilityDensity(states.to(self.device),actions.to(self.device));
        return (torch.exp(logProbablityOld)
                * (logProbablityOld - logProbabilityNew)).mean(); #Tensor kl.mean()
    def get_action(self,state):
        """
        Parameters:
        states (numpy.array): N_state
        Returns:
        numpy.array: N_action | sampled action
        """
        state = torch.from_numpy(state).unsqueeze(0).to(self.device)
        action_mean, action_log_std, action_std = self.forward(state)
        action = torch.normal(action_mean, action_std)
        action = torch.nn.Softmax(dim = 1)(action)
        return action.cpu().detach().numpy()
    def get_mean_action(self,state):
        """
        Parameters:
        states (numpy.array): N_state
        Returns:
        numpy.array: N_action | mean action
        """
        state = torch.from_numpy(state).unsqueeze(0).to(self.device)
        action_mean, action_log_std, action_std = self.forward(state)
        return action_mean.cpu().detach().numpy()Classes
- class Policy (num_inputs, num_outputs, hidden_size, device)
- 
Base class for all neural network modules. Your models should also subclass this class. Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes:: import torch.nn as nn import torch.nn.functional as F class Model(nn.Module): def __init__(self): super(Model, self).__init__() self.conv1 = nn.Conv2d(1, 20, 5) self.conv2 = nn.Conv2d(20, 20, 5) def forward(self, x): x = F.relu(self.conv1(x)) return F.relu(self.conv2(x))Submodules assigned in this way will be registered, and will have their parameters converted too when you call :meth: to, etc.:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool Initializes internal Module state, shared by both nn.Module and ScriptModule. Expand source codeclass Policy(nn.Module): def __init__(self, num_inputs,num_outputs,hidden_size, device): super(Policy, self).__init__() self.inputLayer = nn.Linear(num_inputs, hidden_size) self.hiddenLayer = nn.Linear(hidden_size, hidden_size) self.hiddenLayer2 = nn.Linear(hidden_size, hidden_size) self.outputLayer = nn.Linear(hidden_size, num_outputs) self.logStd = nn.Parameter(torch.zeros(1, num_outputs)) self.device = device def forward(self, x): """ Parameters: states (torch.Tensor): N_state x N_sample Returns: torch.Tensor: N_action x N_sample | mean of the action torch.Tensor: N_action x N_sample | log(std) of action torch.Tensor: N_action x N_sample | std of action """ x = x.double().to(self.device) x = torch.tanh(self.inputLayer(x)) x = torch.tanh(self.hiddenLayer(x)) x = torch.tanh(self.hiddenLayer2(x)) action_mean = self.outputLayer(x) action_logStd = self.logStd.expand_as(action_mean) action_std = torch.exp(self.logStd) return action_mean, action_logStd, action_std def getLogProbabilityDensity(self,states,actions): """ Parameters: states (torch.Tensor): N_state x N_sample | The states of the samples actions (torch.Tensor): N_action x N_sample | The action taken for this samples Returns: torch.Tensor: Log probability of the actions calculated by gaussian distribution """ action_mean, logStd, action_std = self.forward(states.to(self.device)) var = torch.exp(logStd).pow(2); #print(actions.shape, action_mean.shape, var.shape, logStd.shape) logProbablitiesDensity_ = -(actions.reshape(action_mean.shape) - action_mean).pow(2) / ( 2 * var) - 0.5 * np.log(2 * np.pi) - logStd; #print(logProbablitiesDensity_.shape) #assert False, 'ad' return logProbablitiesDensity_.sum(1); def meanKlDivergence(self, states, actions, logProbablityOld): """ Parameters: states (torch.Tensor): N_state x N_sample | The states of the samples actions (torch.Tensor): N_action x N_sample | The action taken for this samples logProbablityOld (torch.Tensor): N_sample | Log probablility of the action, note that this should be detached from the gradient. Returns: torch.Tensor: Scalar | the mean of KL-divergence """ logProbabilityNew = self.getLogProbabilityDensity(states.to(self.device),actions.to(self.device)); return (torch.exp(logProbablityOld) * (logProbablityOld - logProbabilityNew)).mean(); #Tensor kl.mean() def get_action(self,state): """ Parameters: states (numpy.array): N_state Returns: numpy.array: N_action | sampled action """ state = torch.from_numpy(state).unsqueeze(0).to(self.device) action_mean, action_log_std, action_std = self.forward(state) action = torch.normal(action_mean, action_std) action = torch.nn.Softmax(dim = 1)(action) return action.cpu().detach().numpy() def get_mean_action(self,state): """ Parameters: states (numpy.array): N_state Returns: numpy.array: N_action | mean action """ state = torch.from_numpy(state).unsqueeze(0).to(self.device) action_mean, action_log_std, action_std = self.forward(state) return action_mean.cpu().detach().numpy()Ancestors- torch.nn.modules.module.Module
 Class variables- var dump_patches : bool
- var training : bool
 Methods- def forward(self, x) ‑> Callable[..., Any]
- 
Parameters: states (torch.Tensor): N_state x N_sample Returns: torch.Tensor: N_action x N_sample | mean of the action torch.Tensor: N_action x N_sample | log(std) of action torch.Tensor: N_action x N_sample | std of action Expand source codedef forward(self, x): """ Parameters: states (torch.Tensor): N_state x N_sample Returns: torch.Tensor: N_action x N_sample | mean of the action torch.Tensor: N_action x N_sample | log(std) of action torch.Tensor: N_action x N_sample | std of action """ x = x.double().to(self.device) x = torch.tanh(self.inputLayer(x)) x = torch.tanh(self.hiddenLayer(x)) x = torch.tanh(self.hiddenLayer2(x)) action_mean = self.outputLayer(x) action_logStd = self.logStd.expand_as(action_mean) action_std = torch.exp(self.logStd) return action_mean, action_logStd, action_std
- def getLogProbabilityDensity(self, states, actions)
- 
Parameters: states (torch.Tensor): N_state x N_sample | The states of the samples actions (torch.Tensor): N_action x N_sample | The action taken for this samples Returns: torch.Tensor: Log probability of the actions calculated by gaussian distribution Expand source codedef getLogProbabilityDensity(self,states,actions): """ Parameters: states (torch.Tensor): N_state x N_sample | The states of the samples actions (torch.Tensor): N_action x N_sample | The action taken for this samples Returns: torch.Tensor: Log probability of the actions calculated by gaussian distribution """ action_mean, logStd, action_std = self.forward(states.to(self.device)) var = torch.exp(logStd).pow(2); #print(actions.shape, action_mean.shape, var.shape, logStd.shape) logProbablitiesDensity_ = -(actions.reshape(action_mean.shape) - action_mean).pow(2) / ( 2 * var) - 0.5 * np.log(2 * np.pi) - logStd; #print(logProbablitiesDensity_.shape) #assert False, 'ad' return logProbablitiesDensity_.sum(1);
- def get_action(self, state)
- 
Parameters: states (numpy.array): N_state Returns: numpy.array: N_action | sampled action Expand source codedef get_action(self,state): """ Parameters: states (numpy.array): N_state Returns: numpy.array: N_action | sampled action """ state = torch.from_numpy(state).unsqueeze(0).to(self.device) action_mean, action_log_std, action_std = self.forward(state) action = torch.normal(action_mean, action_std) action = torch.nn.Softmax(dim = 1)(action) return action.cpu().detach().numpy()
- def get_mean_action(self, state)
- 
Parameters: states (numpy.array): N_state Returns: numpy.array: N_action | mean action Expand source codedef get_mean_action(self,state): """ Parameters: states (numpy.array): N_state Returns: numpy.array: N_action | mean action """ state = torch.from_numpy(state).unsqueeze(0).to(self.device) action_mean, action_log_std, action_std = self.forward(state) return action_mean.cpu().detach().numpy()
- def meanKlDivergence(self, states, actions, logProbablityOld)
- 
Parameters: states (torch.Tensor): N_state x N_sample | The states of the samples actions (torch.Tensor): N_action x N_sample | The action taken for this samples logProbablityOld (torch.Tensor): N_sample | Log probablility of the action, note that this should be detached from the gradient. Returns: torch.Tensor: Scalar | the mean of KL-divergence Expand source codedef meanKlDivergence(self, states, actions, logProbablityOld): """ Parameters: states (torch.Tensor): N_state x N_sample | The states of the samples actions (torch.Tensor): N_action x N_sample | The action taken for this samples logProbablityOld (torch.Tensor): N_sample | Log probablility of the action, note that this should be detached from the gradient. Returns: torch.Tensor: Scalar | the mean of KL-divergence """ logProbabilityNew = self.getLogProbabilityDensity(states.to(self.device),actions.to(self.device)); return (torch.exp(logProbablityOld) * (logProbablityOld - logProbabilityNew)).mean(); #Tensor kl.mean()