May 28, 2020 Python Q&A

Topics covered in this session include:

  • Integrate forward tester into the RiskOn/RiskOff settings loop
  • Evaluation of results

RiskOnOff_loop_fwdTest.py

# -*- coding: utf-8 -*-
"""
Created on Sun Jan  5 11:02:36 2020

@author: Bruce1
"""

# =======================================================================
# Import Libraries
# =======================================================================
import pandas as pd 
import pandas_datareader.data as web
from   pathlib import Path
import matplotlib.pyplot as plt
#import numpy as np

# =======================================================================
# Gather data function
# =======================================================================
def gatherData(sym, startDate):
    #import pdb; pdb.set_trace()

    savedFile = Path('./{}.xlsx'.format(sym))
    
    if savedFile.exists() == False or refreshData == True:
        print("")
        print("-> Fetching data from the web")
        df = web.DataReader(sym, data_source='yahoo', start=startDate)
    
        print("")
        print("-> Save data to file")  
        df.to_excel("{}.xlsx".format(sym))

    else:
        print("")
        print("-> Fetching data from file") 
        df = pd.read_excel(savedFile, index_col='Date', parse_dates=True)

    # =======================================================================
    # Inspect/Report on data
    # =======================================================================
    firstIndex = df.index.min()
    lastIndex  = df.index.max()
    records = len(df)
    print("")
    print("-> Importing ", sym)
    print("First Date = ", firstIndex)
    print("Last Date  = ", lastIndex)
    print("Total Days = ", records)

    if df.isnull().values.any() == True:
        print("WARNING: there are {} NaN in the data".format(df.isnull().values.sum()))
        print(df.isnull().values)
        
    return df


# =======================================================================
# Gather data function
# =======================================================================
def calcRiskOnRiskOff (symList, cdf, lookback, threshold):
    #import pdb; pdb.set_trace()

    # =======================================================================
    # Calculate Correlation
    # =======================================================================
    #import pdb; pdb.set_trace()
     
    first = True
       
    for ii in range(len(symList) - 1):
        
        for jj in range(ii + 1, len(symList)):
            
            print ("Comparing {} with {}".format(ii, jj))
            
            xdf = cdf.loc[:,symList[ii]].rolling(lookback). \
                           corr(cdf.loc[:,symList[jj]])
                           
            xdf = xdf.rename('{}_{}'.format(symList[ii],symList[jj]))
            
            #plt.figure()
            t = 'Correlation of {} to {} over {} bar lookback'. \
                       format(symList[ii],symList[jj],lookback )
            #xdf.plot(kind='hist', bins=20, title = t)
            #plt.show()
            
            if first:
                finalDf = xdf.copy()
                first = False
            else:
                finalDf = pd.merge(finalDf, xdf, left_index=True, \
                                   right_index=True, how='inner')
    
    
    # =======================================================================
    # Visualize Data
    # =======================================================================
    finalDf.loc[:,'total'] = finalDf.sum(axis=1)
    
    t = 'Sum of Correlations between {} over {} bar lookback'.format(symList, lookback)
    #plt.figure()
    #finalDf.total.plot(kind='hist', bins=20, title=t)
    #plt.show()
    
    #plt.figure()
    #finalDf.total.plot(kind='line', title=t)
    #plt.show()
    
    
    rdf = pd.merge(cdf.SPY, finalDf, left_index=True, right_index=True, how='inner')
    #cdf.to_excel('temp.xlsx')
    
    # Calculation of price direction

    rdf.loc[:,'SPYdiff'] = rdf.SPY.diff()
    rdf.loc[:,'SPYup'] = rdf.SPYdiff > 0
    
    rdf.loc[:,'thresh'] = rdf.total > threashold
    
    minVal = rdf.SPY.min()

    rdf.loc[:,'RiskOn']  = rdf.apply(lambda row: row.SPY if row.thresh &  row.SPYup else minVal, axis=1)
    rdf.loc[:,'RiskOff'] = rdf.apply(lambda row: row.SPY if row.thresh & ~row.SPYup else minVal, axis=1)
    
    t = 'Risk On / Risk Off : {} lookback {} threshold'.format(lookback, threshold)
    
    plt.figure()
    cols = ['SPY','RiskOn', 'RiskOff']
    #rdf[cols].plot(kind='line', title=t)
    plt.show()
    
    plt.figure()
    rdf['2019':'2020'][cols].plot(kind='line', title=t)
    plt.show()
    
    return


 # =======================================================================
#  Function to return Min/Max Return for a date
# =======================================================================   
def minMaxReturn(df, startIndex, endIndex):
    #import pdb; pdb.set_trace()
    
    sdf = df.iloc[startIndex:endIndex]
    
    minPctRet = -(1 - (sdf.Close.min() / sdf.iloc[0].Close))
    maxPctRet = (sdf.Close.max() / sdf.iloc[0].Close) - 1
    
    return minPctRet, maxPctRet


# =======================================================================
# Forward testing function
# =======================================================================
def fwdReturns(df, symList, returnDays):
    #import pdb; pdb.set_trace()
    resultsList = []
    rdfDict = {}
    
    for sym in symList:
        
        df = dfDict[sym]
        num = len(df)
        
        for ii in range(0, num - returnDays):
            
            results = {}
            
            results['Date'] = df.index[ii]
            results['minRet'], results['maxRet'] = minMaxReturn(df, ii, ii+returnDays)
          
            resultsList.append(results)
            
        #import pdb; pdb.set_trace()
        rdf = pd.DataFrame(resultsList)
        rdf.set_index('Date', inplace=True)
        
        rdfDict[sym] = rdf

    return rdfDict


    
# =======================================================================
# Gather data function
# =======================================================================
def calcRiskOnRiskOff2 (sym, cdf, lookback, threshold):
    #import pdb; pdb.set_trace()

    # =======================================================================
    # Calculate Correlation
    # =======================================================================
    #import pdb; pdb.set_trace()
     
    first = True
       
    for ii in range(len(symList) - 1):
        
        for jj in range(ii + 1, len(symList)):
            
            print ("Comparing {} with {}".format(ii, jj))
            
            xdf = cdf.loc[:,symList[ii]].rolling(lookback). \
                           corr(cdf.loc[:,symList[jj]])
                           
            xdf = xdf.rename('{}_{}'.format(symList[ii],symList[jj]))
            

            
            if first:
                finalDf = xdf.copy()
                first = False
            else:
                finalDf = pd.merge(finalDf, xdf, left_index=True, \
                                   right_index=True, how='inner')
    
    #import pdb; pdb.set_trace()
    # =======================================================================
    # Visualize Data
    # =======================================================================
    finalDf.loc[:,'total'] = finalDf.sum(axis=1)
    
    
    #rdf = pd.merge(cdf.SPY, finalDf, left_index=True, right_index=True, how='inner')
    rdf = pd.merge(cdf[sym], finalDf, left_index=True, right_index=True, how='inner')

    # Calculation of price direction

    rdf.loc[:,'diff'] = rdf[sym].diff()
    rdf.loc[:,'up']   = rdf['diff'] > 0.0
    
    rdf.loc[:,'thresh'] = rdf.total > threashold
    
    rdf.loc[:,'RiskOn']  = rdf.thresh &  rdf.up
    rdf.loc[:,'RiskOff'] = rdf.thresh & ~rdf.up
    
    #minVal = rdf.SPY.min()

    #rdf.loc[:,'RiskOn']  = rdf.apply(lambda row: row.SPY if row.thresh &  row.SPYup else minVal, axis=1)
    #rdf.loc[:,'RiskOff'] = rdf.apply(lambda row: row.SPY if row.thresh & ~row.SPYup else minVal, axis=1)
    
    #t = 'Risk On / Risk Off : {} lookback {} threshold'.format(lookback, threshold)
    
    #plt.figure()
    #rdf['2019':'2020'][cols].plot(kind='line', title=t)
    #plt.show()
    
    return rdf[['RiskOn', 'RiskOff']]


# =======================================================================
# Generate Metrics
# =======================================================================
def metrics (df):
    #import pdb; pdb.set_trace()
    
    retAveMax = df.maxRet.mean()
    retAveMin = df.minRet.mean()
    retRatio  = abs(retAveMax/retAveMin)
    
    return retAveMax, retAveMin, retRatio
    
# =======================================================================
# Setup Porgram Variables
# =======================================================================
symList = ['SPY', 'TLT', 'GLD']

startDate = '01/01/2000'

refreshData = False

lookback = 10

threashold = 1.75

returnDays = 63

dfDict = {}

for sym in symList:
    
    dfDict[sym] = gatherData(sym, startDate)


# =======================================================================
# Combining Data
# =======================================================================
cdf = pd.DataFrame() 
first = True

for sym in symList:
    
    tdf = dfDict[sym].copy()
    tdf.rename(columns={'Close':sym}, inplace=True)
    
    if first:
        cdf = tdf.loc[:,sym]
        first = False
        
    else:
        cdf = pd.merge(cdf, tdf.loc[:,sym], left_index=True, \
                       right_index=True,  how='inner')

firstIndex = cdf.index.min().date()
lastIndex  = cdf.index.max().date()

print ("")
print ("Combined Date Range = {} to {}".format(firstIndex, lastIndex))
print ("Total Trading Days = {}".format(len(cdf)))



#import pdb; pdb.set_trace()

# =======================================================================
# Set Threashold, looback to optimize indicator
# =======================================================================
# 1. Baseline metrics (Forward looking returns from QuickBkTest.py)
# 2. Loop on configuration
#    a. Gather the risk on/ risk off dates
#    b. Combine the dataframes of the baseline (forward looking results by day)
#       with the risk on / risk off date
#    c. Collect the metrics

# 1. Baseline



resultsList = []

fwdResultsDict = fwdReturns(cdf, symList, returnDays)

for sym in symList:
    
    results = {}
    
    retAveMax, retAveMin, retRatio = metrics(fwdResultsDict[sym])
    
    results = {'Symbol'    : sym,
               'Loopback'  : 'Baseline',
               'Threshold' : 'Baseline',
               'RiskOn'    : 'Baseline',
               'RiskOff'   : 'Baseline',
               'retAveMax' : retAveMax,
               'retAveMin' : retAveMin,
               'retRatio'  : retRatio,
               }


    resultsList.append(results)



thresholdList = [1.75, 2.0, 2.25]
lookbackList  = [5, 10, 15]

for threshold in thresholdList:
    
    for lookback in lookbackList:
        
        for sym in symList:
            
            riskDf = calcRiskOnRiskOff2(sym, cdf, lookback, threshold)
            
            #import pdb; pdb.set_trace()
            
            df = pd.merge(riskDf, fwdResultsDict[sym], left_index=True, \
                          right_index=True, how='inner')
            
            # make a loop for RiskOn and RiskOff
            for risk in ['RiskOn', 'RiskOff']:
        
                result = {}
                
                sdf = df[df[risk] == True].copy()
                
                retAveMax, retAveMin, retRatio = metrics(sdf)
    
                result = {'Symbol'    : sym,
                           'Loopback'  : lookback,
                           'Threshold' : threshold,
                           'RiskOn'    : risk == 'RiskOn',
                           'RiskOff'   : risk == 'RiskOff',
                           'retAveMax' : retAveMax,
                           'retAveMin' : retAveMin,
                           'retRatio'  : retRatio,
                           }
            
            
                resultsList.append(result)
            
import pdb; pdb.set_trace()
metricsDf = pd.DataFrame(resultsList)
metricsDf.to_excel('results.xlsx')