Module AssetAllocator.trainer

Expand source code
import os
import pickle
import sys

if '__file__' in vars():
    # print("We are running the script non interactively")
    path = os.path.join(os.path.dirname(__file__), os.pardir)
    sys.path.append(path)    
else:
    # print('We are running the script interactively')
    sys.path.append("..")

import matplotlib.pylab as plt
import numpy as np
import pandas as pd
from tqdm import tqdm

from .environments.PortfolioGym import PortfolioManagementGym as PMG
from .environments.utils import *

def load_stock_data(filepath):
    dir_path = os.path.dirname(os.path.realpath(__file__))
    data = pd.read_csv(dir_path + '/' + filepath, index_col = 'Date')
    for col in data.columns:
       data[col] = data[col].astype(float)
    
    idx = int(len(data) * 0.7)
    train_data = data.iloc[:idx]
    test_data = data.iloc[idx:]
    #print(len(train_data), len(test_data))
    return train_data, test_data

def evaluate(test_env, model, test_length):
    agent_rwds = []
    obs = test_env.reset()
    dones = False

    while not dones:
        action = model.predict(obs)
        obs, rewards, dones, info = test_env.step(action)
        agent_rwds  += [test_env.render()]

    #simple_returns = [agent_rwd[0] for agent_rwd in agent_rwds]
    rwds = agent_rwds[-test_length:]
    #print(len(rwds))
    return rwds

class Trainer:
    def __init__(self, 
                 filepath, 
                 experiment_name, 
                 timesteps, 
                 print_every,
                 episode_length = None,
                 returns = True,
                 trading_cost_ratio = 0.001,
                 lookback_period = 64,
                 initial_investment = 1_000_000,
                 retain_cash = True,
                 random_start_range = 20,
                 dsr_constant = 1e-4,
                 add_softmax = False,
                 start_date = '2009-01-01',
                 end_date = '2022-01-01',
                 seed = 0,
                 test_length = 550,
                 test_runs = 1):
        
        self.filepath = filepath
        self.experiment_name = experiment_name
        self.timesteps = timesteps
        self.print_every = print_every
        self.episode_length = episode_length 
        self.returns = returns
        self.trading_cost_ratio = trading_cost_ratio
        self.lookback_period = lookback_period
        self.initial_investment = initial_investment
        self.retain_cash = retain_cash
        self.random_start_range = random_start_range
        self.dsr_constant = dsr_constant
        self.add_softmax = add_softmax
        self.start_date = start_date
        self.end_date = end_date
        self.seed = seed
        self.test_length = test_length
        self.test_runs = test_runs
        
    def get_train_env(self):
        self.train_data, self.test_data = load_stock_data(self.filepath)

        self.train_env = PMG(
                             data = self.train_data,
                             episode_length = len(self.train_data),
                             returns = self.returns,
                             trading_cost_ratio = self.trading_cost_ratio,
                             lookback_period = self.lookback_period,
                             initial_investment = self.initial_investment,
                             retain_cash = self.retain_cash,
                             random_start_range = self.random_start_range,
                             dsr_constant = self.dsr_constant,
                             add_softmax = self.add_softmax,
                             start_date = self.start_date,
                             end_date = self.end_date,
                             seed = self.seed)
        return self.train_env
        
    def run(self, model, train_mode = True):
        results = []
        
        if train_mode:
            try:
                model.learn(timesteps = self.timesteps, print_every = self.print_every)
            except TypeError:
                model.learn(total_timesteps = self.timesteps, log_interval = self.print_every)


        # Run the test phase for different phase
        for seed in tqdm(range(self.test_runs)):

            test_env =  PMG(
                             data = self.test_data,
                             episode_length = len(self.test_data),
                             returns = self.returns,
                             trading_cost_ratio = self.trading_cost_ratio,
                             lookback_period = self.lookback_period,
                             initial_investment = self.initial_investment,
                             retain_cash = self.retain_cash,
                             random_start_range = 0,
                             dsr_constant = self.dsr_constant,
                             add_softmax = self.add_softmax,
                             start_date = self.start_date,
                             end_date = self.end_date,
                             seed = self.seed
                            )
            
            results += [evaluate(test_env, model, self.test_length)]

        # Save the results for each analysis into different files
        with open(f'{self.experiment_name}.pkl', 'wb') as f:
            pickle.dump(results, f)

        with open(f'{self.experiment_name}_portfolio.pkl', 'wb') as f:
            pickle.dump(test_env.weights, f)
    
        return results
        
        
        

Functions

def evaluate(test_env, model, test_length)
Expand source code
def evaluate(test_env, model, test_length):
    agent_rwds = []
    obs = test_env.reset()
    dones = False

    while not dones:
        action = model.predict(obs)
        obs, rewards, dones, info = test_env.step(action)
        agent_rwds  += [test_env.render()]

    #simple_returns = [agent_rwd[0] for agent_rwd in agent_rwds]
    rwds = agent_rwds[-test_length:]
    #print(len(rwds))
    return rwds
def load_stock_data(filepath)
Expand source code
def load_stock_data(filepath):
    dir_path = os.path.dirname(os.path.realpath(__file__))
    data = pd.read_csv(dir_path + '/' + filepath, index_col = 'Date')
    for col in data.columns:
       data[col] = data[col].astype(float)
    
    idx = int(len(data) * 0.7)
    train_data = data.iloc[:idx]
    test_data = data.iloc[idx:]
    #print(len(train_data), len(test_data))
    return train_data, test_data

Classes

class Trainer (filepath, experiment_name, timesteps, print_every, episode_length=None, returns=True, trading_cost_ratio=0.001, lookback_period=64, initial_investment=1000000, retain_cash=True, random_start_range=20, dsr_constant=0.0001, add_softmax=False, start_date='2009-01-01', end_date='2022-01-01', seed=0, test_length=550, test_runs=1)
Expand source code
class Trainer:
    def __init__(self, 
                 filepath, 
                 experiment_name, 
                 timesteps, 
                 print_every,
                 episode_length = None,
                 returns = True,
                 trading_cost_ratio = 0.001,
                 lookback_period = 64,
                 initial_investment = 1_000_000,
                 retain_cash = True,
                 random_start_range = 20,
                 dsr_constant = 1e-4,
                 add_softmax = False,
                 start_date = '2009-01-01',
                 end_date = '2022-01-01',
                 seed = 0,
                 test_length = 550,
                 test_runs = 1):
        
        self.filepath = filepath
        self.experiment_name = experiment_name
        self.timesteps = timesteps
        self.print_every = print_every
        self.episode_length = episode_length 
        self.returns = returns
        self.trading_cost_ratio = trading_cost_ratio
        self.lookback_period = lookback_period
        self.initial_investment = initial_investment
        self.retain_cash = retain_cash
        self.random_start_range = random_start_range
        self.dsr_constant = dsr_constant
        self.add_softmax = add_softmax
        self.start_date = start_date
        self.end_date = end_date
        self.seed = seed
        self.test_length = test_length
        self.test_runs = test_runs
        
    def get_train_env(self):
        self.train_data, self.test_data = load_stock_data(self.filepath)

        self.train_env = PMG(
                             data = self.train_data,
                             episode_length = len(self.train_data),
                             returns = self.returns,
                             trading_cost_ratio = self.trading_cost_ratio,
                             lookback_period = self.lookback_period,
                             initial_investment = self.initial_investment,
                             retain_cash = self.retain_cash,
                             random_start_range = self.random_start_range,
                             dsr_constant = self.dsr_constant,
                             add_softmax = self.add_softmax,
                             start_date = self.start_date,
                             end_date = self.end_date,
                             seed = self.seed)
        return self.train_env
        
    def run(self, model, train_mode = True):
        results = []
        
        if train_mode:
            try:
                model.learn(timesteps = self.timesteps, print_every = self.print_every)
            except TypeError:
                model.learn(total_timesteps = self.timesteps, log_interval = self.print_every)


        # Run the test phase for different phase
        for seed in tqdm(range(self.test_runs)):

            test_env =  PMG(
                             data = self.test_data,
                             episode_length = len(self.test_data),
                             returns = self.returns,
                             trading_cost_ratio = self.trading_cost_ratio,
                             lookback_period = self.lookback_period,
                             initial_investment = self.initial_investment,
                             retain_cash = self.retain_cash,
                             random_start_range = 0,
                             dsr_constant = self.dsr_constant,
                             add_softmax = self.add_softmax,
                             start_date = self.start_date,
                             end_date = self.end_date,
                             seed = self.seed
                            )
            
            results += [evaluate(test_env, model, self.test_length)]

        # Save the results for each analysis into different files
        with open(f'{self.experiment_name}.pkl', 'wb') as f:
            pickle.dump(results, f)

        with open(f'{self.experiment_name}_portfolio.pkl', 'wb') as f:
            pickle.dump(test_env.weights, f)
    
        return results

Methods

def get_train_env(self)
Expand source code
def get_train_env(self):
    self.train_data, self.test_data = load_stock_data(self.filepath)

    self.train_env = PMG(
                         data = self.train_data,
                         episode_length = len(self.train_data),
                         returns = self.returns,
                         trading_cost_ratio = self.trading_cost_ratio,
                         lookback_period = self.lookback_period,
                         initial_investment = self.initial_investment,
                         retain_cash = self.retain_cash,
                         random_start_range = self.random_start_range,
                         dsr_constant = self.dsr_constant,
                         add_softmax = self.add_softmax,
                         start_date = self.start_date,
                         end_date = self.end_date,
                         seed = self.seed)
    return self.train_env
def run(self, model, train_mode=True)
Expand source code
def run(self, model, train_mode = True):
    results = []
    
    if train_mode:
        try:
            model.learn(timesteps = self.timesteps, print_every = self.print_every)
        except TypeError:
            model.learn(total_timesteps = self.timesteps, log_interval = self.print_every)


    # Run the test phase for different phase
    for seed in tqdm(range(self.test_runs)):

        test_env =  PMG(
                         data = self.test_data,
                         episode_length = len(self.test_data),
                         returns = self.returns,
                         trading_cost_ratio = self.trading_cost_ratio,
                         lookback_period = self.lookback_period,
                         initial_investment = self.initial_investment,
                         retain_cash = self.retain_cash,
                         random_start_range = 0,
                         dsr_constant = self.dsr_constant,
                         add_softmax = self.add_softmax,
                         start_date = self.start_date,
                         end_date = self.end_date,
                         seed = self.seed
                        )
        
        results += [evaluate(test_env, model, self.test_length)]

    # Save the results for each analysis into different files
    with open(f'{self.experiment_name}.pkl', 'wb') as f:
        pickle.dump(results, f)

    with open(f'{self.experiment_name}_portfolio.pkl', 'wb') as f:
        pickle.dump(test_env.weights, f)

    return results