Module AssetAllocator.algorithms.A2C.agent
Expand source code
from .a2c import Actor, Critic, A2CLearner, Runner
import torch
class A2CAgent: 
    
    """This is the agent class for the A2C Agent.
    Original paper can be found at https://arxiv.org/abs/1802.09477
    This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/td3
    
    """
    def __init__(self, env, hidden_dim = 256, gamma=0.9, entropy_beta=0,
                 actor_lr=4e-4, critic_lr=4e-3, max_grad_norm=0.5):
        
        """Initializes the A2C Agent
        Args:
            env ([type]): Gym environment for the agent to interact with
            hidden_dim (int, optional): Size of hidden layer neurons. Defaults to 256.
            device (str, optional): One of cuda or cpu. Defaults to 'cuda'.
            memory_dim ([type], optional): Size of replay buffer. Defaults to 100_000.
            actor_lr ([type], optional): Actor's learning rate. Defaults to 1e-3.
            critic_lr ([type], optional): Critic's learning rate. Defaults to 1e-3
        """  
        
        self.env = env
           
        n_actions = self.env.action_space.shape[0]
        state_dim = self.env.observation_space.shape[0]
        
        actor = Actor(state_dim, hidden_dim, n_actions)
        critic = Critic(state_dim, hidden_dim)
        self.learner = A2CLearner(actor, critic, gamma, entropy_beta,
                             actor_lr, critic_lr, max_grad_norm)
        self.runner = Runner(env, actor)
        
    def learn(self, timesteps, print_every = 1000):
        """
        Trains the agent
        Params
        ======
            timesteps (int): Number of timesteps the agent should interact with the environment
            print_every (int): Verbosity control
        """
        total_steps = timesteps//self.env.episode_length + 1
        while self.runner.steps <= timesteps:
            memory = self.runner.run(total_steps, print_every)
            self.learner.learn(memory, self.runner.steps, discount_rewards=True)
            
    def predict(self, state):
        """Returns agent's action based on a given state
        Args:
            state (array_like): Current environment state
        Returns:
            action (array_like): Agent's action
        """         
        return self.learner.predict(state)
    
    def save(self, file_name):
        """
        Saves trained model
        Params
        =====
        filepath(str) : folder path to save the agent
        """
        torch.save(self.actor.state_dict(), filename + '_actor')
        torch.save(self.learner.actor_optim.state_dict(), filename + '_actor_optimizer')
        
        torch.save(self.critic.state_dict(), filename + '_critic')
        torch.save(self.learner.critic_optim.state_dict(), filename + '_critic_optimizer')
        
    def load(self, file_name):
        """
        Loads trained model
        Params
        =====
        filepath(str) : folder path to save the agent
        """
        self.actor.load_state_dict(torch.load(filename + '_actor'))
        self.learner.actor_optim.load_state_dict(torch.load(filename + '_actor_optimizer'))
        
        self.critic.load_state_dict(torch.load(filename + '_critic'))
        self.learner.critic_optim.load_state_dict(torch.load(filename + '_critic_optimizer'))
        Classes
- class A2CAgent (env, hidden_dim=256, gamma=0.9, entropy_beta=0, actor_lr=0.0004, critic_lr=0.004, max_grad_norm=0.5)
- 
This is the agent class for the A2C Agent. Original paper can be found at https://arxiv.org/abs/1802.09477 This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/td3 Initializes the A2C Agent Args- env:- [type]
- Gym environment for the agent to interact with
- hidden_dim:- int, optional
- Size of hidden layer neurons. Defaults to 256.
- device:- str, optional
- One of cuda or cpu. Defaults to 'cuda'.
- memory_dim:- [type], optional
- Size of replay buffer. Defaults to 100_000.
- actor_lr:- [type], optional
- Actor's learning rate. Defaults to 1e-3.
- critic_lr:- [type], optional
- Critic's learning rate. Defaults to 1e-3
 Expand source codeclass A2CAgent: """This is the agent class for the A2C Agent. Original paper can be found at https://arxiv.org/abs/1802.09477 This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/td3 """ def __init__(self, env, hidden_dim = 256, gamma=0.9, entropy_beta=0, actor_lr=4e-4, critic_lr=4e-3, max_grad_norm=0.5): """Initializes the A2C Agent Args: env ([type]): Gym environment for the agent to interact with hidden_dim (int, optional): Size of hidden layer neurons. Defaults to 256. device (str, optional): One of cuda or cpu. Defaults to 'cuda'. memory_dim ([type], optional): Size of replay buffer. Defaults to 100_000. actor_lr ([type], optional): Actor's learning rate. Defaults to 1e-3. critic_lr ([type], optional): Critic's learning rate. Defaults to 1e-3 """ self.env = env n_actions = self.env.action_space.shape[0] state_dim = self.env.observation_space.shape[0] actor = Actor(state_dim, hidden_dim, n_actions) critic = Critic(state_dim, hidden_dim) self.learner = A2CLearner(actor, critic, gamma, entropy_beta, actor_lr, critic_lr, max_grad_norm) self.runner = Runner(env, actor) def learn(self, timesteps, print_every = 1000): """ Trains the agent Params ====== timesteps (int): Number of timesteps the agent should interact with the environment print_every (int): Verbosity control """ total_steps = timesteps//self.env.episode_length + 1 while self.runner.steps <= timesteps: memory = self.runner.run(total_steps, print_every) self.learner.learn(memory, self.runner.steps, discount_rewards=True) def predict(self, state): """Returns agent's action based on a given state Args: state (array_like): Current environment state Returns: action (array_like): Agent's action """ return self.learner.predict(state) def save(self, file_name): """ Saves trained model Params ===== filepath(str) : folder path to save the agent """ torch.save(self.actor.state_dict(), filename + '_actor') torch.save(self.learner.actor_optim.state_dict(), filename + '_actor_optimizer') torch.save(self.critic.state_dict(), filename + '_critic') torch.save(self.learner.critic_optim.state_dict(), filename + '_critic_optimizer') def load(self, file_name): """ Loads trained model Params ===== filepath(str) : folder path to save the agent """ self.actor.load_state_dict(torch.load(filename + '_actor')) self.learner.actor_optim.load_state_dict(torch.load(filename + '_actor_optimizer')) self.critic.load_state_dict(torch.load(filename + '_critic')) self.learner.critic_optim.load_state_dict(torch.load(filename + '_critic_optimizer'))Methods- def learn(self, timesteps, print_every=1000)
- 
Trains the agent Params ====== timesteps (int): Number of timesteps the agent should interact with the environment print_every (int): Verbosity control Expand source codedef learn(self, timesteps, print_every = 1000): """ Trains the agent Params ====== timesteps (int): Number of timesteps the agent should interact with the environment print_every (int): Verbosity control """ total_steps = timesteps//self.env.episode_length + 1 while self.runner.steps <= timesteps: memory = self.runner.run(total_steps, print_every) self.learner.learn(memory, self.runner.steps, discount_rewards=True)
- def load(self, file_name)
- 
Loads trained model Params ===== filepath(str) : folder path to save the agent Expand source codedef load(self, file_name): """ Loads trained model Params ===== filepath(str) : folder path to save the agent """ self.actor.load_state_dict(torch.load(filename + '_actor')) self.learner.actor_optim.load_state_dict(torch.load(filename + '_actor_optimizer')) self.critic.load_state_dict(torch.load(filename + '_critic')) self.learner.critic_optim.load_state_dict(torch.load(filename + '_critic_optimizer'))
- def predict(self, state)
- 
Returns agent's action based on a given state Args- state:- array_like
- Current environment state
 Returnsaction (array_like): Agent's action Expand source codedef predict(self, state): """Returns agent's action based on a given state Args: state (array_like): Current environment state Returns: action (array_like): Agent's action """ return self.learner.predict(state)
- def save(self, file_name)
- 
Saves trained model Params ===== filepath(str) : folder path to save the agent Expand source codedef save(self, file_name): """ Saves trained model Params ===== filepath(str) : folder path to save the agent """ torch.save(self.actor.state_dict(), filename + '_actor') torch.save(self.learner.actor_optim.state_dict(), filename + '_actor_optimizer') torch.save(self.critic.state_dict(), filename + '_critic') torch.save(self.learner.critic_optim.state_dict(), filename + '_critic_optimizer')