May 14, 2020 Python Q&A

Topics covered in this session include:

  • Added metrics to forward window testing
  • Visualize realized forward window testing with histograms and metrics in title
  • Began modifications to the risk on/off tool characterization loop program.

QuickBkTest.py

# -*- coding: utf-8 -*-
"""
Created on Thu May  7 07:49:44 2020

@author: Bruce1
"""
# =======================================================================
# Import Libraries
# =======================================================================
import pandas as pd 
import pandas_datareader.data as web
from   pathlib import Path
import matplotlib.pyplot as plt
#import numpy as np

# =======================================================================
# Gather data function
# =======================================================================
def gatherData(sym, startDate):
    #import pdb; pdb.set_trace()

    savedFile = Path('./{}.xlsx'.format(sym))
    
    if savedFile.exists() == False or refreshData == True:
        print("")
        print("-> Fetching data from the web")
        df = web.DataReader(sym, data_source='yahoo', start=startDate)
    
        print("")
        print("-> Save data to file")  
        df.to_excel("{}.xlsx".format(sym))

    else:
        print("")
        print("-> Fetching data from file") 
        df = pd.read_excel(savedFile, index_col='Date', parse_dates=True)

    # =======================================================================
    # Inspect/Report on data
    # =======================================================================
    firstIndex = df.index.min()
    lastIndex  = df.index.max()
    records = len(df)
    print("")
    print("-> Importing ", sym)
    print("First Date = ", firstIndex)
    print("Last Date  = ", lastIndex)
    print("Total Days = ", records)

    if df.isnull().values.any() == True:
        print("WARNING: there are {} NaN in the data".format(df.isnull().values.sum()))
        print(df.isnull().values)
        
    return df

# =======================================================================
# Setup Porgram Variables
# =======================================================================
symList = ['SPY', 'TLT', 'GLD']

startDate = '01/01/2000'

returnDays = 63

refreshData = False

lookback = 10

threashold = 1.75


dfDict = {}

for sym in symList:
    
    dfDict[sym] = gatherData(sym, startDate)




 # =======================================================================
#  Function to return Min/Max Return for a date
# =======================================================================   
def minMaxReturn(df, startIndex, endIndex):
    #import pdb; pdb.set_trace()
    
    sdf = df.iloc[startIndex:endIndex]
    
    minPctRet = -(1 - (sdf.Close.min() / sdf.iloc[0].Close))
    maxPctRet = (sdf.Close.max() / sdf.iloc[0].Close) - 1
    
    return minPctRet, maxPctRet

# =======================================================================
#  Loop through dataframes collectin daily min/max returns
# =======================================================================
resultsList = []
rdfDict = {}

for sym in symList:
    
    df = dfDict[sym]
    num = len(df)
    
    for ii in range(0, num - returnDays):
        
        results = {}
        
        results['Date'] = df.index[ii]
        results['minRet'], results['maxRet'] = minMaxReturn(df, ii, ii+returnDays)
      
        resultsList.append(results)
        
    #import pdb; pdb.set_trace()
    rdf = pd.DataFrame(resultsList)
    rdf.set_index('Date')
    
    rdfDict[sym] = rdf

# =======================================================================
#  Visualize our Data
# =======================================================================
        
import pdb; pdb.set_trace()

for sym in symList:
    
    rdf = rdfDict[sym]
    
    #rdf.minRet.plot(kind='hist', bins=20)
    #plt.show()
    
    #rdf.maxRet.plot(kind='hist', bins=20)
    #plt.show()
    
    retAveMax = rdf.maxRet.mean()
    retAveMin = rdf.minRet.mean()
    retRatio  = abs(retAveMax/retAveMin)
    
    t='{} {} day Forward Returns {:.2f} Return Ratio \n AveMin = {:.3f} AveMax = {:.3f}' \
            .format(sym, returnDays, retRatio, retAveMin, retAveMax)
    
    rdf[['minRet', 'maxRet']].plot(kind='hist', bins=40, title=t, color=['DarkBlue','DarkGreen'])
    plt.show()