Topics covered in this session include:
- Working around diff() function not working
- Formulas for Streak Calculation
Trendiness.py
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 11 07:51:15 2020
@author: Bruce1
"""
#=======================================================================
# Import Libraries
# =======================================================================
import pandas as pd
import pandas_datareader.data as web
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
# =======================================================================
# Gather data function
# =======================================================================
def gatherData(sym, startDate):
#import pdb; pdb.set_trace()
savedFile = Path('./{}.xlsx'.format(sym))
if savedFile.exists() == False or refreshData == True:
print("")
print("-> Fetching data from the web")
df = web.DataReader(sym, data_source='yahoo', start=startDate)
print("")
print("-> Save data to file")
df.to_excel("{}.xlsx".format(sym))
else:
print("")
print("-> Fetching data from file")
df = pd.read_excel(savedFile, index_col='Date', parse_dates=True)
# =======================================================================
# Inspect/Report on data
# =======================================================================
firstIndex = df.index.min()
lastIndex = df.index.max()
records = len(df)
print("")
print("-> Importing ", sym)
print("First Date = ", firstIndex)
print("Last Date = ", lastIndex)
print("Total Days = ", records)
if df.isnull().values.any() == True:
print("WARNING: there are {} NaN in the data".format(df.isnull().values.sum()))
print(df.isnull().values)
return df
# =======================================================================
# Setup Porgram Variables
# =======================================================================
symList = ['SPY', 'TLT', 'GLD']
startDate = '01/01/2000'
refreshData = False
lookback = 50
dfDict = {}
for sym in symList:
dfDict[sym] = gatherData(sym, startDate)
# =======================================================================
# Daily Analysis for Trendiness
# - For this program, a trend is closes above or below a simple moving
# average. Trend length is defined as a streak, concecutive closes
# either above or below.
# =======================================================================
#import pdb; pdb.set_trace()
for sym in symList:
df = dfDict[sym]
df.rename(columns={'Adj Close': 'AdjClose'}, inplace=True)
df.loc[:,'SMA'] = df.AdjClose.rolling(lookback).mean()
df.loc[:,'Above'] = df.AdjClose > df.SMA
df.loc[:,'Below'] = df.AdjClose < df.SMA
#df.loc[:,'AbvMeta'] = df.Above.groupby(df.Above.diff().cumsum()).cumcount()
x = df.Above - df.Above.shift()
x = x.astype(bool)
x = x.cumsum()
df.loc[:,'AbvMeta'] = df.Above.groupby(x).cumcount()
df.loc[:,'NextAbove'] = df.Above.shift(-1)
df.loc[:,'AbvStreak'] = df.apply(lambda x: x.AbvMeta if (
(x.Above == True) & (x.NextAbove == False)) else 0, axis=1)
x = df.Below - df.Below.shift()
x = x.astype(bool)
x = x.cumsum()
df.loc[:,'BlwMeta'] = df.Below.groupby(x).cumcount()
df.loc[:,'NextBelow'] = df.Below.shift(-1)
df.loc[:,'BlwStreak'] = df.apply(lambda x: -x.AbvMeta if (
(x.Below == True) & (x.NextBelow == False)) else 0, axis=1)
#import pdb; pdb.set_trace()
df.loc[:,'Streak'] = df.apply(lambda x: x.AbvStreak if x.AbvStreak != 0
else np.NaN, axis=1)
df.loc[:,'Streak'] = df.apply(lambda x: x.BlwStreak if x.BlwStreak != 0
else x.Streak, axis=1)
t='{}'.format(sym)
df.Streak.plot(kind='hist',bins=25,title=t)
plt.show()