Module AssetAllocator.algorithms.A2C.agent

Expand source code
from .a2c import Actor, Critic, A2CLearner, Runner
import torch

class A2CAgent: 
    
    """This is the agent class for the A2C Agent.

    Original paper can be found at https://arxiv.org/abs/1802.09477

    This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/td3
    
    """
    def __init__(self, env, hidden_dim = 256, gamma=0.9, entropy_beta=0,
                 actor_lr=4e-4, critic_lr=4e-3, max_grad_norm=0.5):
        
        """Initializes the A2C Agent

        Args:
            env ([type]): Gym environment for the agent to interact with
            hidden_dim (int, optional): Size of hidden layer neurons. Defaults to 256.
            device (str, optional): One of cuda or cpu. Defaults to 'cuda'.
            memory_dim ([type], optional): Size of replay buffer. Defaults to 100_000.
            actor_lr ([type], optional): Actor's learning rate. Defaults to 1e-3.
            critic_lr ([type], optional): Critic's learning rate. Defaults to 1e-3
        """  
        
        self.env = env
           
        n_actions = self.env.action_space.shape[0]
        state_dim = self.env.observation_space.shape[0]
        
        actor = Actor(state_dim, hidden_dim, n_actions)
        critic = Critic(state_dim, hidden_dim)

        self.learner = A2CLearner(actor, critic, gamma, entropy_beta,
                             actor_lr, critic_lr, max_grad_norm)
        self.runner = Runner(env, actor)
        
    def learn(self, timesteps, print_every = 1000):
        """
        Trains the agent
        Params
        ======
            timesteps (int): Number of timesteps the agent should interact with the environment
            print_every (int): Verbosity control
        """
        total_steps = timesteps//self.env.episode_length + 1

        while self.runner.steps <= timesteps:
            memory = self.runner.run(total_steps, print_every)
            self.learner.learn(memory, self.runner.steps, discount_rewards=True)
            
    def predict(self, state):

        """Returns agent's action based on a given state
        Args:
            state (array_like): Current environment state
        Returns:
            action (array_like): Agent's action
        """         
        return self.learner.predict(state)
    
    def save(self, file_name):
        """
        Saves trained model
        Params
        =====
        filepath(str) : folder path to save the agent
        """
        torch.save(self.actor.state_dict(), filename + '_actor')
        torch.save(self.learner.actor_optim.state_dict(), filename + '_actor_optimizer')
        
        torch.save(self.critic.state_dict(), filename + '_critic')
        torch.save(self.learner.critic_optim.state_dict(), filename + '_critic_optimizer')
        
    def load(self, file_name):
        """
        Loads trained model
        Params
        =====
        filepath(str) : folder path to save the agent
        """
        self.actor.load_state_dict(torch.load(filename + '_actor'))
        self.learner.actor_optim.load_state_dict(torch.load(filename + '_actor_optimizer'))
        
        self.critic.load_state_dict(torch.load(filename + '_critic'))
        self.learner.critic_optim.load_state_dict(torch.load(filename + '_critic_optimizer'))
        

Classes

class A2CAgent (env, hidden_dim=256, gamma=0.9, entropy_beta=0, actor_lr=0.0004, critic_lr=0.004, max_grad_norm=0.5)

This is the agent class for the A2C Agent.

Original paper can be found at https://arxiv.org/abs/1802.09477

This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/td3

Initializes the A2C Agent

Args

env : [type]
Gym environment for the agent to interact with
hidden_dim : int, optional
Size of hidden layer neurons. Defaults to 256.
device : str, optional
One of cuda or cpu. Defaults to 'cuda'.
memory_dim : [type], optional
Size of replay buffer. Defaults to 100_000.
actor_lr : [type], optional
Actor's learning rate. Defaults to 1e-3.
critic_lr : [type], optional
Critic's learning rate. Defaults to 1e-3
Expand source code
class A2CAgent: 
    
    """This is the agent class for the A2C Agent.

    Original paper can be found at https://arxiv.org/abs/1802.09477

    This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/td3
    
    """
    def __init__(self, env, hidden_dim = 256, gamma=0.9, entropy_beta=0,
                 actor_lr=4e-4, critic_lr=4e-3, max_grad_norm=0.5):
        
        """Initializes the A2C Agent

        Args:
            env ([type]): Gym environment for the agent to interact with
            hidden_dim (int, optional): Size of hidden layer neurons. Defaults to 256.
            device (str, optional): One of cuda or cpu. Defaults to 'cuda'.
            memory_dim ([type], optional): Size of replay buffer. Defaults to 100_000.
            actor_lr ([type], optional): Actor's learning rate. Defaults to 1e-3.
            critic_lr ([type], optional): Critic's learning rate. Defaults to 1e-3
        """  
        
        self.env = env
           
        n_actions = self.env.action_space.shape[0]
        state_dim = self.env.observation_space.shape[0]
        
        actor = Actor(state_dim, hidden_dim, n_actions)
        critic = Critic(state_dim, hidden_dim)

        self.learner = A2CLearner(actor, critic, gamma, entropy_beta,
                             actor_lr, critic_lr, max_grad_norm)
        self.runner = Runner(env, actor)
        
    def learn(self, timesteps, print_every = 1000):
        """
        Trains the agent
        Params
        ======
            timesteps (int): Number of timesteps the agent should interact with the environment
            print_every (int): Verbosity control
        """
        total_steps = timesteps//self.env.episode_length + 1

        while self.runner.steps <= timesteps:
            memory = self.runner.run(total_steps, print_every)
            self.learner.learn(memory, self.runner.steps, discount_rewards=True)
            
    def predict(self, state):

        """Returns agent's action based on a given state
        Args:
            state (array_like): Current environment state
        Returns:
            action (array_like): Agent's action
        """         
        return self.learner.predict(state)
    
    def save(self, file_name):
        """
        Saves trained model
        Params
        =====
        filepath(str) : folder path to save the agent
        """
        torch.save(self.actor.state_dict(), filename + '_actor')
        torch.save(self.learner.actor_optim.state_dict(), filename + '_actor_optimizer')
        
        torch.save(self.critic.state_dict(), filename + '_critic')
        torch.save(self.learner.critic_optim.state_dict(), filename + '_critic_optimizer')
        
    def load(self, file_name):
        """
        Loads trained model
        Params
        =====
        filepath(str) : folder path to save the agent
        """
        self.actor.load_state_dict(torch.load(filename + '_actor'))
        self.learner.actor_optim.load_state_dict(torch.load(filename + '_actor_optimizer'))
        
        self.critic.load_state_dict(torch.load(filename + '_critic'))
        self.learner.critic_optim.load_state_dict(torch.load(filename + '_critic_optimizer'))

Methods

def learn(self, timesteps, print_every=1000)

Trains the agent Params ====== timesteps (int): Number of timesteps the agent should interact with the environment print_every (int): Verbosity control

Expand source code
def learn(self, timesteps, print_every = 1000):
    """
    Trains the agent
    Params
    ======
        timesteps (int): Number of timesteps the agent should interact with the environment
        print_every (int): Verbosity control
    """
    total_steps = timesteps//self.env.episode_length + 1

    while self.runner.steps <= timesteps:
        memory = self.runner.run(total_steps, print_every)
        self.learner.learn(memory, self.runner.steps, discount_rewards=True)
def load(self, file_name)

Loads trained model Params ===== filepath(str) : folder path to save the agent

Expand source code
def load(self, file_name):
    """
    Loads trained model
    Params
    =====
    filepath(str) : folder path to save the agent
    """
    self.actor.load_state_dict(torch.load(filename + '_actor'))
    self.learner.actor_optim.load_state_dict(torch.load(filename + '_actor_optimizer'))
    
    self.critic.load_state_dict(torch.load(filename + '_critic'))
    self.learner.critic_optim.load_state_dict(torch.load(filename + '_critic_optimizer'))
def predict(self, state)

Returns agent's action based on a given state

Args

state : array_like
Current environment state

Returns

action (array_like): Agent's action

Expand source code
def predict(self, state):

    """Returns agent's action based on a given state
    Args:
        state (array_like): Current environment state
    Returns:
        action (array_like): Agent's action
    """         
    return self.learner.predict(state)
def save(self, file_name)

Saves trained model Params ===== filepath(str) : folder path to save the agent

Expand source code
def save(self, file_name):
    """
    Saves trained model
    Params
    =====
    filepath(str) : folder path to save the agent
    """
    torch.save(self.actor.state_dict(), filename + '_actor')
    torch.save(self.learner.actor_optim.state_dict(), filename + '_actor_optimizer')
    
    torch.save(self.critic.state_dict(), filename + '_critic')
    torch.save(self.learner.critic_optim.state_dict(), filename + '_critic_optimizer')