Module AssetAllocator.algorithms.TD3.actor
Expand source code
import torch.nn as nn
import torch.optim as optim
class Actor(nn.Module):
"""This is the actor network for the TD3 Agent.
Original paper can be found at https://arxiv.org/abs/1802.09477
This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/td3
"""
def __init__(self, state_dim, action_dim, hidden_dim, lookback_dim, add_lstm = True, num_layers = 3,
lr = 0.1, max_action = 1, dropout = 0.2):
"""Initialize the TD3 Actor Network
Args:
state_dim (int): State space dimension
action_dim (int): Action space dimension
hidden_dim (int): Hidden layer neurons size
lookback_dim (int): Environment lookback dimension
add_lstm (bool, optional): Boolean to add lstm layer. Defaults to True.
num_layers (int, optional): Number of LSTM layers. Defaults to 3.
lr (float, optional): Learning rate. Defaults to 0.1.
max_action (int, optional): Action scaling value. Defaults to 1.
dropout (float, optional): Dropout probability. Defaults to 0.2.
"""
super(Actor, self).__init__()
self.state_dim = state_dim
self.action_dim = action_dim
self.max_action = max_action
in_dim = state_dim//(action_dim - 1)
if add_lstm:
self.lstm = nn.LSTM(action_dim - 1, state_dim//2, num_layers = num_layers, batch_first = True,
dropout = dropout, bidirectional = True)
self.linear_relu_stack = nn.Sequential(
nn.Linear(state_dim * lookback_dim, hidden_dim),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, action_dim),
)
else:
self.lstm = None
self.linear_relu_stack = self.linear_relu_stack = nn.Sequential(
nn.Linear(state_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, action_dim),
)
self.optimizer = optim.Adam(self.linear_relu_stack.parameters(),
lr = lr)
self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, 'min', patience = 2)
def forward(self, state):
"""Forward pass
Args:
state (array_like): Current environment state
Returns:
action: Agent's Action Values
"""
if self.lstm:
state = state.reshape(state.shape[0], -1, self.action_dim - 1)
out, _ = self.lstm(state)
out = self.linear_relu_stack(out.reshape(state.shape[0],-1))
else:
out = self.linear_relu_stack(state)
action = nn.Softmax(dim = 1)(out)
return action * self.max_action
Classes
class Actor (state_dim, action_dim, hidden_dim, lookback_dim, add_lstm=True, num_layers=3, lr=0.1, max_action=1, dropout=0.2)
-
This is the actor network for the TD3 Agent.
Original paper can be found at https://arxiv.org/abs/1802.09477
This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/td3
Initialize the TD3 Actor Network
Args
state_dim
:int
- State space dimension
action_dim
:int
- Action space dimension
hidden_dim
:int
- Hidden layer neurons size
lookback_dim
:int
- Environment lookback dimension
add_lstm
:bool
, optional- Boolean to add lstm layer. Defaults to True.
num_layers
:int
, optional- Number of LSTM layers. Defaults to 3.
lr
:float
, optional- Learning rate. Defaults to 0.1.
max_action
:int
, optional- Action scaling value. Defaults to 1.
dropout
:float
, optional- Dropout probability. Defaults to 0.2.
Expand source code
class Actor(nn.Module): """This is the actor network for the TD3 Agent. Original paper can be found at https://arxiv.org/abs/1802.09477 This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/td3 """ def __init__(self, state_dim, action_dim, hidden_dim, lookback_dim, add_lstm = True, num_layers = 3, lr = 0.1, max_action = 1, dropout = 0.2): """Initialize the TD3 Actor Network Args: state_dim (int): State space dimension action_dim (int): Action space dimension hidden_dim (int): Hidden layer neurons size lookback_dim (int): Environment lookback dimension add_lstm (bool, optional): Boolean to add lstm layer. Defaults to True. num_layers (int, optional): Number of LSTM layers. Defaults to 3. lr (float, optional): Learning rate. Defaults to 0.1. max_action (int, optional): Action scaling value. Defaults to 1. dropout (float, optional): Dropout probability. Defaults to 0.2. """ super(Actor, self).__init__() self.state_dim = state_dim self.action_dim = action_dim self.max_action = max_action in_dim = state_dim//(action_dim - 1) if add_lstm: self.lstm = nn.LSTM(action_dim - 1, state_dim//2, num_layers = num_layers, batch_first = True, dropout = dropout, bidirectional = True) self.linear_relu_stack = nn.Sequential( nn.Linear(state_dim * lookback_dim, hidden_dim), nn.ReLU(), nn.Dropout(dropout), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, action_dim), ) else: self.lstm = None self.linear_relu_stack = self.linear_relu_stack = nn.Sequential( nn.Linear(state_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, action_dim), ) self.optimizer = optim.Adam(self.linear_relu_stack.parameters(), lr = lr) self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, 'min', patience = 2) def forward(self, state): """Forward pass Args: state (array_like): Current environment state Returns: action: Agent's Action Values """ if self.lstm: state = state.reshape(state.shape[0], -1, self.action_dim - 1) out, _ = self.lstm(state) out = self.linear_relu_stack(out.reshape(state.shape[0],-1)) else: out = self.linear_relu_stack(state) action = nn.Softmax(dim = 1)(out) return action * self.max_action
Ancestors
- torch.nn.modules.module.Module
Class variables
var dump_patches : bool
var training : bool
Methods
def forward(self, state) ‑> Callable[..., Any]
-
Forward pass
Args
state
:array_like
- Current environment state
Returns
action
- Agent's Action Values
Expand source code
def forward(self, state): """Forward pass Args: state (array_like): Current environment state Returns: action: Agent's Action Values """ if self.lstm: state = state.reshape(state.shape[0], -1, self.action_dim - 1) out, _ = self.lstm(state) out = self.linear_relu_stack(out.reshape(state.shape[0],-1)) else: out = self.linear_relu_stack(state) action = nn.Softmax(dim = 1)(out) return action * self.max_action