Topics covered in this session include:
- Integrate forward tester into the RiskOn/RiskOff settings loop
- Evaluation of results
RiskOnOff_loop_fwdTest.py
# -*- coding: utf-8 -*-
"""
Created on Sun Jan 5 11:02:36 2020
@author: Bruce1
"""
# =======================================================================
# Import Libraries
# =======================================================================
import pandas as pd
import pandas_datareader.data as web
from pathlib import Path
import matplotlib.pyplot as plt
#import numpy as np
# =======================================================================
# Gather data function
# =======================================================================
def gatherData(sym, startDate):
#import pdb; pdb.set_trace()
savedFile = Path('./{}.xlsx'.format(sym))
if savedFile.exists() == False or refreshData == True:
print("")
print("-> Fetching data from the web")
df = web.DataReader(sym, data_source='yahoo', start=startDate)
print("")
print("-> Save data to file")
df.to_excel("{}.xlsx".format(sym))
else:
print("")
print("-> Fetching data from file")
df = pd.read_excel(savedFile, index_col='Date', parse_dates=True)
# =======================================================================
# Inspect/Report on data
# =======================================================================
firstIndex = df.index.min()
lastIndex = df.index.max()
records = len(df)
print("")
print("-> Importing ", sym)
print("First Date = ", firstIndex)
print("Last Date = ", lastIndex)
print("Total Days = ", records)
if df.isnull().values.any() == True:
print("WARNING: there are {} NaN in the data".format(df.isnull().values.sum()))
print(df.isnull().values)
return df
# =======================================================================
# Gather data function
# =======================================================================
def calcRiskOnRiskOff (symList, cdf, lookback, threshold):
#import pdb; pdb.set_trace()
# =======================================================================
# Calculate Correlation
# =======================================================================
#import pdb; pdb.set_trace()
first = True
for ii in range(len(symList) - 1):
for jj in range(ii + 1, len(symList)):
print ("Comparing {} with {}".format(ii, jj))
xdf = cdf.loc[:,symList[ii]].rolling(lookback). \
corr(cdf.loc[:,symList[jj]])
xdf = xdf.rename('{}_{}'.format(symList[ii],symList[jj]))
#plt.figure()
t = 'Correlation of {} to {} over {} bar lookback'. \
format(symList[ii],symList[jj],lookback )
#xdf.plot(kind='hist', bins=20, title = t)
#plt.show()
if first:
finalDf = xdf.copy()
first = False
else:
finalDf = pd.merge(finalDf, xdf, left_index=True, \
right_index=True, how='inner')
# =======================================================================
# Visualize Data
# =======================================================================
finalDf.loc[:,'total'] = finalDf.sum(axis=1)
t = 'Sum of Correlations between {} over {} bar lookback'.format(symList, lookback)
#plt.figure()
#finalDf.total.plot(kind='hist', bins=20, title=t)
#plt.show()
#plt.figure()
#finalDf.total.plot(kind='line', title=t)
#plt.show()
rdf = pd.merge(cdf.SPY, finalDf, left_index=True, right_index=True, how='inner')
#cdf.to_excel('temp.xlsx')
# Calculation of price direction
rdf.loc[:,'SPYdiff'] = rdf.SPY.diff()
rdf.loc[:,'SPYup'] = rdf.SPYdiff > 0
rdf.loc[:,'thresh'] = rdf.total > threashold
minVal = rdf.SPY.min()
rdf.loc[:,'RiskOn'] = rdf.apply(lambda row: row.SPY if row.thresh & row.SPYup else minVal, axis=1)
rdf.loc[:,'RiskOff'] = rdf.apply(lambda row: row.SPY if row.thresh & ~row.SPYup else minVal, axis=1)
t = 'Risk On / Risk Off : {} lookback {} threshold'.format(lookback, threshold)
plt.figure()
cols = ['SPY','RiskOn', 'RiskOff']
#rdf[cols].plot(kind='line', title=t)
plt.show()
plt.figure()
rdf['2019':'2020'][cols].plot(kind='line', title=t)
plt.show()
return
# =======================================================================
# Function to return Min/Max Return for a date
# =======================================================================
def minMaxReturn(df, startIndex, endIndex):
#import pdb; pdb.set_trace()
sdf = df.iloc[startIndex:endIndex]
minPctRet = -(1 - (sdf.Close.min() / sdf.iloc[0].Close))
maxPctRet = (sdf.Close.max() / sdf.iloc[0].Close) - 1
return minPctRet, maxPctRet
# =======================================================================
# Forward testing function
# =======================================================================
def fwdReturns(df, symList, returnDays):
#import pdb; pdb.set_trace()
resultsList = []
rdfDict = {}
for sym in symList:
df = dfDict[sym]
num = len(df)
for ii in range(0, num - returnDays):
results = {}
results['Date'] = df.index[ii]
results['minRet'], results['maxRet'] = minMaxReturn(df, ii, ii+returnDays)
resultsList.append(results)
#import pdb; pdb.set_trace()
rdf = pd.DataFrame(resultsList)
rdf.set_index('Date', inplace=True)
rdfDict[sym] = rdf
return rdfDict
# =======================================================================
# Gather data function
# =======================================================================
def calcRiskOnRiskOff2 (sym, cdf, lookback, threshold):
#import pdb; pdb.set_trace()
# =======================================================================
# Calculate Correlation
# =======================================================================
#import pdb; pdb.set_trace()
first = True
for ii in range(len(symList) - 1):
for jj in range(ii + 1, len(symList)):
print ("Comparing {} with {}".format(ii, jj))
xdf = cdf.loc[:,symList[ii]].rolling(lookback). \
corr(cdf.loc[:,symList[jj]])
xdf = xdf.rename('{}_{}'.format(symList[ii],symList[jj]))
if first:
finalDf = xdf.copy()
first = False
else:
finalDf = pd.merge(finalDf, xdf, left_index=True, \
right_index=True, how='inner')
#import pdb; pdb.set_trace()
# =======================================================================
# Visualize Data
# =======================================================================
finalDf.loc[:,'total'] = finalDf.sum(axis=1)
#rdf = pd.merge(cdf.SPY, finalDf, left_index=True, right_index=True, how='inner')
rdf = pd.merge(cdf[sym], finalDf, left_index=True, right_index=True, how='inner')
# Calculation of price direction
rdf.loc[:,'diff'] = rdf[sym].diff()
rdf.loc[:,'up'] = rdf['diff'] > 0.0
rdf.loc[:,'thresh'] = rdf.total > threashold
rdf.loc[:,'RiskOn'] = rdf.thresh & rdf.up
rdf.loc[:,'RiskOff'] = rdf.thresh & ~rdf.up
#minVal = rdf.SPY.min()
#rdf.loc[:,'RiskOn'] = rdf.apply(lambda row: row.SPY if row.thresh & row.SPYup else minVal, axis=1)
#rdf.loc[:,'RiskOff'] = rdf.apply(lambda row: row.SPY if row.thresh & ~row.SPYup else minVal, axis=1)
#t = 'Risk On / Risk Off : {} lookback {} threshold'.format(lookback, threshold)
#plt.figure()
#rdf['2019':'2020'][cols].plot(kind='line', title=t)
#plt.show()
return rdf[['RiskOn', 'RiskOff']]
# =======================================================================
# Generate Metrics
# =======================================================================
def metrics (df):
#import pdb; pdb.set_trace()
retAveMax = df.maxRet.mean()
retAveMin = df.minRet.mean()
retRatio = abs(retAveMax/retAveMin)
return retAveMax, retAveMin, retRatio
# =======================================================================
# Setup Porgram Variables
# =======================================================================
symList = ['SPY', 'TLT', 'GLD']
startDate = '01/01/2000'
refreshData = False
lookback = 10
threashold = 1.75
returnDays = 63
dfDict = {}
for sym in symList:
dfDict[sym] = gatherData(sym, startDate)
# =======================================================================
# Combining Data
# =======================================================================
cdf = pd.DataFrame()
first = True
for sym in symList:
tdf = dfDict[sym].copy()
tdf.rename(columns={'Close':sym}, inplace=True)
if first:
cdf = tdf.loc[:,sym]
first = False
else:
cdf = pd.merge(cdf, tdf.loc[:,sym], left_index=True, \
right_index=True, how='inner')
firstIndex = cdf.index.min().date()
lastIndex = cdf.index.max().date()
print ("")
print ("Combined Date Range = {} to {}".format(firstIndex, lastIndex))
print ("Total Trading Days = {}".format(len(cdf)))
#import pdb; pdb.set_trace()
# =======================================================================
# Set Threashold, looback to optimize indicator
# =======================================================================
# 1. Baseline metrics (Forward looking returns from QuickBkTest.py)
# 2. Loop on configuration
# a. Gather the risk on/ risk off dates
# b. Combine the dataframes of the baseline (forward looking results by day)
# with the risk on / risk off date
# c. Collect the metrics
# 1. Baseline
resultsList = []
fwdResultsDict = fwdReturns(cdf, symList, returnDays)
for sym in symList:
results = {}
retAveMax, retAveMin, retRatio = metrics(fwdResultsDict[sym])
results = {'Symbol' : sym,
'Loopback' : 'Baseline',
'Threshold' : 'Baseline',
'RiskOn' : 'Baseline',
'RiskOff' : 'Baseline',
'retAveMax' : retAveMax,
'retAveMin' : retAveMin,
'retRatio' : retRatio,
}
resultsList.append(results)
thresholdList = [1.75, 2.0, 2.25]
lookbackList = [5, 10, 15]
for threshold in thresholdList:
for lookback in lookbackList:
for sym in symList:
riskDf = calcRiskOnRiskOff2(sym, cdf, lookback, threshold)
#import pdb; pdb.set_trace()
df = pd.merge(riskDf, fwdResultsDict[sym], left_index=True, \
right_index=True, how='inner')
# make a loop for RiskOn and RiskOff
for risk in ['RiskOn', 'RiskOff']:
result = {}
sdf = df[df[risk] == True].copy()
retAveMax, retAveMin, retRatio = metrics(sdf)
result = {'Symbol' : sym,
'Loopback' : lookback,
'Threshold' : threshold,
'RiskOn' : risk == 'RiskOn',
'RiskOff' : risk == 'RiskOff',
'retAveMax' : retAveMax,
'retAveMin' : retAveMin,
'retRatio' : retRatio,
}
resultsList.append(result)
import pdb; pdb.set_trace()
metricsDf = pd.DataFrame(resultsList)
metricsDf.to_excel('results.xlsx')