Module AssetAllocator.algorithms.TD3.memory

Expand source code
import torch
import numpy as np
import random

class Memory:
    """This is the replay buffer class for the TD3 Agent.

    Original paper can be found at https://arxiv.org/abs/1802.09477

    This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/td3
    
    """
    def __init__(self, capacity):
        """Initialize a ReplayBuffer object.

        Args:
            capacity (int): maximum size of buffer
        """
        self.capacity = capacity
        self.buffer = [None] * capacity
        self.idx = 0

    def store(self, experience):
        """Add a new experience to memory.

        Args:
            experience (array_like): current state, current action, reward, next state, and current end status tuple  
        """
        index = self.idx % self.capacity
        self.buffer[index] = experience
        self.idx += 1

    def sample(self, batch_size, device):
        """
        Randomly sample a batch of experiences from memory.

        Args:
            batch_size (int): Batch size to sample
            device: One of cuda or cpus
        """
        experience = np.array(random.sample(self.buffer[:self.idx], batch_size), dtype=object)

        states = torch.tensor(np.array(experience[:, 0].tolist()), dtype=torch.float32).to(device)
        actions = torch.tensor(np.array(experience[:, 1].tolist()), dtype=torch.float32).to(device)
        next_states = torch.tensor(np.array(experience[:, 2].tolist()), dtype=torch.float32).to(device)
        rewards = torch.tensor(np.array(experience[:, 3].tolist()), dtype=torch.float32).to(device)
        dones = torch.tensor(np.array(experience[:, 4].tolist())).to(device)

        return states, actions, next_states, rewards, dones
    
    def __len__(self):
        """
        Return the current size of internal memory.
        """
        if self.idx <= self.capacity:
            return self.idx
        return self.capacity

Classes

class Memory (capacity)

This is the replay buffer class for the TD3 Agent.

Original paper can be found at https://arxiv.org/abs/1802.09477

This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/td3

Initialize a ReplayBuffer object.

Args

capacity : int
maximum size of buffer
Expand source code
class Memory:
    """This is the replay buffer class for the TD3 Agent.

    Original paper can be found at https://arxiv.org/abs/1802.09477

    This implementation was adapted from https://github.com/saashanair/rl-series/tree/master/td3
    
    """
    def __init__(self, capacity):
        """Initialize a ReplayBuffer object.

        Args:
            capacity (int): maximum size of buffer
        """
        self.capacity = capacity
        self.buffer = [None] * capacity
        self.idx = 0

    def store(self, experience):
        """Add a new experience to memory.

        Args:
            experience (array_like): current state, current action, reward, next state, and current end status tuple  
        """
        index = self.idx % self.capacity
        self.buffer[index] = experience
        self.idx += 1

    def sample(self, batch_size, device):
        """
        Randomly sample a batch of experiences from memory.

        Args:
            batch_size (int): Batch size to sample
            device: One of cuda or cpus
        """
        experience = np.array(random.sample(self.buffer[:self.idx], batch_size), dtype=object)

        states = torch.tensor(np.array(experience[:, 0].tolist()), dtype=torch.float32).to(device)
        actions = torch.tensor(np.array(experience[:, 1].tolist()), dtype=torch.float32).to(device)
        next_states = torch.tensor(np.array(experience[:, 2].tolist()), dtype=torch.float32).to(device)
        rewards = torch.tensor(np.array(experience[:, 3].tolist()), dtype=torch.float32).to(device)
        dones = torch.tensor(np.array(experience[:, 4].tolist())).to(device)

        return states, actions, next_states, rewards, dones
    
    def __len__(self):
        """
        Return the current size of internal memory.
        """
        if self.idx <= self.capacity:
            return self.idx
        return self.capacity

Methods

def sample(self, batch_size, device)

Randomly sample a batch of experiences from memory.

Args

batch_size : int
Batch size to sample
device
One of cuda or cpus
Expand source code
def sample(self, batch_size, device):
    """
    Randomly sample a batch of experiences from memory.

    Args:
        batch_size (int): Batch size to sample
        device: One of cuda or cpus
    """
    experience = np.array(random.sample(self.buffer[:self.idx], batch_size), dtype=object)

    states = torch.tensor(np.array(experience[:, 0].tolist()), dtype=torch.float32).to(device)
    actions = torch.tensor(np.array(experience[:, 1].tolist()), dtype=torch.float32).to(device)
    next_states = torch.tensor(np.array(experience[:, 2].tolist()), dtype=torch.float32).to(device)
    rewards = torch.tensor(np.array(experience[:, 3].tolist()), dtype=torch.float32).to(device)
    dones = torch.tensor(np.array(experience[:, 4].tolist())).to(device)

    return states, actions, next_states, rewards, dones
def store(self, experience)

Add a new experience to memory.

Args

experience : array_like
current state, current action, reward, next state, and current end status tuple
Expand source code
def store(self, experience):
    """Add a new experience to memory.

    Args:
        experience (array_like): current state, current action, reward, next state, and current end status tuple  
    """
    index = self.idx % self.capacity
    self.buffer[index] = experience
    self.idx += 1