April 23, 2020 Python Q&A

Topics covered in this session include:

  • Continued development on the Risk on / Risk off tool
  • Introduction and application of the <dataframe>.apply() function
  • Visualization of filter and equity curve together

RiskOnOff.py

# -*- coding: utf-8 -*-
"""
Created on Sun Jan  5 11:02:36 2020

@author: Bruce1
"""

# =======================================================================
# Import Libraries
# =======================================================================
import pandas as pd 
import pandas_datareader.data as web
from   pathlib import Path
import matplotlib.pyplot as plt
#import numpy as np

# =======================================================================
# Setup Porgram Variables
# =======================================================================
symList = ['SPY', 'TLT', 'GLD']

startDate = '01/01/2000'

refreshData = False

lookback = 10

threashold = 1.75

# =======================================================================
# Gather data function
# =======================================================================
def gatherData(sym, startDate):
    #import pdb; pdb.set_trace()

    savedFile = Path('./{}.xlsx'.format(sym))
    
    if savedFile.exists() == False or refreshData == True:
        print("")
        print("-> Fetching data from the web")
        df = web.DataReader(sym, data_source='yahoo', start=startDate)
    
        print("")
        print("-> Save data to file")  
        df.to_excel("{}.xlsx".format(sym))

    else:
        print("")
        print("-> Fetching data from file") 
        df = pd.read_excel(savedFile, index_col='Date', parse_dates=True)

    # =======================================================================
    # Inspect/Report on data
    # =======================================================================
    firstIndex = df.index.min()
    lastIndex  = df.index.max()
    records = len(df)
    print("")
    print("-> Importing ", sym)
    print("First Date = ", firstIndex)
    print("Last Date  = ", lastIndex)
    print("Total Days = ", records)

    if df.isnull().values.any() == True:
        print("WARNING: there are {} NaN in the data".format(df.isnull().values.sum()))
        print(df.isnull().values)
        
    return df

dfDict = {}

for sym in symList:
    
    dfDict[sym] = gatherData(sym, startDate)


# =======================================================================
# Combining Data
# =======================================================================
cdf = pd.DataFrame() 
first = True

for sym in symList:
    
    tdf = dfDict[sym].copy()
    tdf.rename(columns={'Close':sym}, inplace=True)
    
    if first:
        cdf = tdf.loc[:,sym]
        first = False
        
    else:
        cdf = pd.merge(cdf, tdf.loc[:,sym], left_index=True, \
                       right_index=True,  how='inner')
    
#import pdb; pdb.set_trace()

firstIndex = cdf.index.min().date()
lastIndex  = cdf.index.max().date()

print ("")
print ("Combined Date Range = {} to {}".format(firstIndex, lastIndex))
print ("Total Trading Days = {}".format(len(cdf)))

# =======================================================================
# Calculate Correlation
# =======================================================================
#import pdb; pdb.set_trace()
 
first = True
   
for ii in range(len(symList) - 1):
    
    for jj in range(ii + 1, len(symList)):
        
        print ("Comparing {} with {}".format(ii, jj))
        
        xdf = cdf.loc[:,symList[ii]].rolling(lookback). \
                       corr(cdf.loc[:,symList[jj]])
                       
        xdf = xdf.rename('{}_{}'.format(symList[ii],symList[jj]))
        
        plt.figure()
        t = 'Correlation of {} to {} over {} bar lookback'. \
                   format(symList[ii],symList[jj],lookback )
        xdf.plot(kind='hist', bins=20, title = t)
        plt.show()
        
        if first:
            finalDf = xdf.copy()
            first = False
        else:
            finalDf = pd.merge(finalDf, xdf, left_index=True, \
                               right_index=True, how='inner')


# =======================================================================
# Visualize Data
# =======================================================================
finalDf.loc[:,'total'] = finalDf.sum(axis=1)

t = 'Sum of Correlations between {} over {} bar lookback'.format(symList, lookback)
plt.figure()
finalDf.total.plot(kind='hist', bins=20, title=t)
plt.show()

plt.figure()
finalDf.total.plot(kind='line', title=t)
plt.show()


rdf = pd.merge(cdf.SPY, finalDf, left_index=True, right_index=True, how='inner')
cdf.to_excel('temp.xlsx')

rdf.loc[:,'thresh'] = rdf.total > threashold

minVal = rdf.SPY.min()
rdf.loc[:,'filterSig'] = rdf.apply(lambda row: row.SPY if row.thresh else minVal, axis=1)

plt.figure()
cols = ['SPY','filterSig']
rdf[cols].plot(kind='line', title=t)
plt.show()

plt.figure()
rdf['2018':'2020'][cols].plot(kind='line', title=t)
plt.show()

import pdb; pdb.set_trace()

Scroll to Top