Module AssetAllocator.algorithms.DDPG.Replay_Memory

Script that contains the details about the experience replay buffer used to ensure training stability

Expand source code
"""
Script that contains the details about the experience replay buffer used to ensure training stability
"""

## initial thought was to use deque, but with a large replay memory it turns out to be very inefficient -- see https://stackoverflow.com/questions/40181284/how-to-get-random-sample-from-deque-in-python-3

import random
import numpy as np

import torch

class ReplayMemory:
    """
    Class representing the replay buffer used for storing experiences for off-policy learning
    """

    def __init__(self, capacity):
        """Initialize a ReplayBuffer object.

        Args:
            capacity (int): maximum size of buffer
        """
        self.capacity = capacity
        self.buffer = [] # create a list of lists, such that each experience added to memory is a list of 5-items of the form (state, action, next_state, reward, done)
        self.idx = 0

    def store(self, transition):
        """Add a new experience to memory.

        Args:
            transition (array_like): current state, current action, reward, next state, and current end status tuple  
        """
        if len(self.buffer) < self.capacity:
            self.buffer.append(transition)
        else:
            self.buffer[self.idx] = transition
        self.idx = (self.idx + 1) % self.capacity # for circular memory


    def sample(self, batchsize, device):
        """
        Randomly sample a batch of experiences from memory.

        Args:
            batch_size (int): Batch size to sample
            device: One of cuda or cpus
        """
        transitions = np.array(random.sample(self.buffer, batchsize), dtype=object)
        states = torch.tensor(np.array(transitions[:, 0].tolist()), dtype=torch.float32).to(device)
        actions = torch.tensor(np.array(transitions[:, 1].tolist()), dtype=torch.float32).to(device)
        next_states = torch.tensor(np.array(transitions[:, 2].tolist()), dtype=torch.float32).to(device)
        rewards = torch.tensor(np.array(transitions[:, 3].tolist()), dtype=torch.float32).to(device)
        dones = torch.tensor(np.array(transitions[:, 4].tolist())).to(device)

        return states, actions, next_states, rewards, dones


    def __len__(self):
        """
        Return the current size of internal memory.
        """
        return len(self.buffer)

Classes

class ReplayMemory (capacity)

Class representing the replay buffer used for storing experiences for off-policy learning

Initialize a ReplayBuffer object.

Args

capacity : int
maximum size of buffer
Expand source code
class ReplayMemory:
    """
    Class representing the replay buffer used for storing experiences for off-policy learning
    """

    def __init__(self, capacity):
        """Initialize a ReplayBuffer object.

        Args:
            capacity (int): maximum size of buffer
        """
        self.capacity = capacity
        self.buffer = [] # create a list of lists, such that each experience added to memory is a list of 5-items of the form (state, action, next_state, reward, done)
        self.idx = 0

    def store(self, transition):
        """Add a new experience to memory.

        Args:
            transition (array_like): current state, current action, reward, next state, and current end status tuple  
        """
        if len(self.buffer) < self.capacity:
            self.buffer.append(transition)
        else:
            self.buffer[self.idx] = transition
        self.idx = (self.idx + 1) % self.capacity # for circular memory


    def sample(self, batchsize, device):
        """
        Randomly sample a batch of experiences from memory.

        Args:
            batch_size (int): Batch size to sample
            device: One of cuda or cpus
        """
        transitions = np.array(random.sample(self.buffer, batchsize), dtype=object)
        states = torch.tensor(np.array(transitions[:, 0].tolist()), dtype=torch.float32).to(device)
        actions = torch.tensor(np.array(transitions[:, 1].tolist()), dtype=torch.float32).to(device)
        next_states = torch.tensor(np.array(transitions[:, 2].tolist()), dtype=torch.float32).to(device)
        rewards = torch.tensor(np.array(transitions[:, 3].tolist()), dtype=torch.float32).to(device)
        dones = torch.tensor(np.array(transitions[:, 4].tolist())).to(device)

        return states, actions, next_states, rewards, dones


    def __len__(self):
        """
        Return the current size of internal memory.
        """
        return len(self.buffer)

Methods

def sample(self, batchsize, device)

Randomly sample a batch of experiences from memory.

Args

batch_size : int
Batch size to sample
device
One of cuda or cpus
Expand source code
def sample(self, batchsize, device):
    """
    Randomly sample a batch of experiences from memory.

    Args:
        batch_size (int): Batch size to sample
        device: One of cuda or cpus
    """
    transitions = np.array(random.sample(self.buffer, batchsize), dtype=object)
    states = torch.tensor(np.array(transitions[:, 0].tolist()), dtype=torch.float32).to(device)
    actions = torch.tensor(np.array(transitions[:, 1].tolist()), dtype=torch.float32).to(device)
    next_states = torch.tensor(np.array(transitions[:, 2].tolist()), dtype=torch.float32).to(device)
    rewards = torch.tensor(np.array(transitions[:, 3].tolist()), dtype=torch.float32).to(device)
    dones = torch.tensor(np.array(transitions[:, 4].tolist())).to(device)

    return states, actions, next_states, rewards, dones
def store(self, transition)

Add a new experience to memory.

Args

transition : array_like
current state, current action, reward, next state, and current end status tuple
Expand source code
def store(self, transition):
    """Add a new experience to memory.

    Args:
        transition (array_like): current state, current action, reward, next state, and current end status tuple  
    """
    if len(self.buffer) < self.capacity:
        self.buffer.append(transition)
    else:
        self.buffer[self.idx] = transition
    self.idx = (self.idx + 1) % self.capacity # for circular memory