Module `AssetAllocator.algorithms.DDPG.Replay_Memory`

Script that contains the details about the experience replay buffer used to ensure training stability

Expand source code

"""
Script that contains the details about the experience replay buffer used to ensure training stability
"""

## initial thought was to use deque, but with a large replay memory it turns out to be very inefficient -- see https://stackoverflow.com/questions/40181284/how-to-get-random-sample-from-deque-in-python-3

import random
import numpy as np

import torch

class ReplayMemory:
    """
    Class representing the replay buffer used for storing experiences for off-policy learning
    """

    def __init__(self, capacity):
        """Initialize a ReplayBuffer object.

        Args:
            capacity (int): maximum size of buffer
        """
        self.capacity = capacity
        self.buffer = [] # create a list of lists, such that each experience added to memory is a list of 5-items of the form (state, action, next_state, reward, done)
        self.idx = 0

    def store(self, transition):
        """Add a new experience to memory.

        Args:
            transition (array_like): current state, current action, reward, next state, and current end status tuple  
        """
        if len(self.buffer) < self.capacity:
            self.buffer.append(transition)
        else:
            self.buffer[self.idx] = transition
        self.idx = (self.idx + 1) % self.capacity # for circular memory


    def sample(self, batchsize, device):
        """
        Randomly sample a batch of experiences from memory.

        Args:
            batch_size (int): Batch size to sample
            device: One of cuda or cpus
        """
        transitions = np.array(random.sample(self.buffer, batchsize), dtype=object)
        states = torch.tensor(np.array(transitions[:, 0].tolist()), dtype=torch.float32).to(device)
        actions = torch.tensor(np.array(transitions[:, 1].tolist()), dtype=torch.float32).to(device)
        next_states = torch.tensor(np.array(transitions[:, 2].tolist()), dtype=torch.float32).to(device)
        rewards = torch.tensor(np.array(transitions[:, 3].tolist()), dtype=torch.float32).to(device)
        dones = torch.tensor(np.array(transitions[:, 4].tolist())).to(device)

        return states, actions, next_states, rewards, dones


    def __len__(self):
        """
        Return the current size of internal memory.
        """
        return len(self.buffer)

Classes

class ReplayMemory (capacity)

Class representing the replay buffer used for storing experiences for off-policy learning

Initialize a ReplayBuffer object.

Args

capacity : int: maximum size of buffer

Expand source code

class ReplayMemory:
    """
    Class representing the replay buffer used for storing experiences for off-policy learning
    """

    def __init__(self, capacity):
        """Initialize a ReplayBuffer object.

        Args:
            capacity (int): maximum size of buffer
        """
        self.capacity = capacity
        self.buffer = [] # create a list of lists, such that each experience added to memory is a list of 5-items of the form (state, action, next_state, reward, done)
        self.idx = 0

    def store(self, transition):
        """Add a new experience to memory.

        Args:
            transition (array_like): current state, current action, reward, next state, and current end status tuple  
        """
        if len(self.buffer) < self.capacity:
            self.buffer.append(transition)
        else:
            self.buffer[self.idx] = transition
        self.idx = (self.idx + 1) % self.capacity # for circular memory


    def sample(self, batchsize, device):
        """
        Randomly sample a batch of experiences from memory.

        Args:
            batch_size (int): Batch size to sample
            device: One of cuda or cpus
        """
        transitions = np.array(random.sample(self.buffer, batchsize), dtype=object)
        states = torch.tensor(np.array(transitions[:, 0].tolist()), dtype=torch.float32).to(device)
        actions = torch.tensor(np.array(transitions[:, 1].tolist()), dtype=torch.float32).to(device)
        next_states = torch.tensor(np.array(transitions[:, 2].tolist()), dtype=torch.float32).to(device)
        rewards = torch.tensor(np.array(transitions[:, 3].tolist()), dtype=torch.float32).to(device)
        dones = torch.tensor(np.array(transitions[:, 4].tolist())).to(device)

        return states, actions, next_states, rewards, dones


    def __len__(self):
        """
        Return the current size of internal memory.
        """
        return len(self.buffer)

Methods

def sample(self, batchsize, device)

Randomly sample a batch of experiences from memory.

Args

batch_size : int: Batch size to sample
device: One of cuda or cpus

Expand source code

def sample(self, batchsize, device):
    """
    Randomly sample a batch of experiences from memory.

    Args:
        batch_size (int): Batch size to sample
        device: One of cuda or cpus
    """
    transitions = np.array(random.sample(self.buffer, batchsize), dtype=object)
    states = torch.tensor(np.array(transitions[:, 0].tolist()), dtype=torch.float32).to(device)
    actions = torch.tensor(np.array(transitions[:, 1].tolist()), dtype=torch.float32).to(device)
    next_states = torch.tensor(np.array(transitions[:, 2].tolist()), dtype=torch.float32).to(device)
    rewards = torch.tensor(np.array(transitions[:, 3].tolist()), dtype=torch.float32).to(device)
    dones = torch.tensor(np.array(transitions[:, 4].tolist())).to(device)

    return states, actions, next_states, rewards, dones

def store(self, transition)

Add a new experience to memory.

Args

transition : array_like: current state, current action, reward, next state, and current end status tuple

Expand source code

def store(self, transition):
    """Add a new experience to memory.

    Args:
        transition (array_like): current state, current action, reward, next state, and current end status tuple  
    """
    if len(self.buffer) < self.capacity:
        self.buffer.append(transition)
    else:
        self.buffer[self.idx] = transition
    self.idx = (self.idx + 1) % self.capacity # for circular memory