June 18, 2020 Python Q&A

Topics covered in this session include:

  • Working around diff() function not working
  • Formulas for Streak Calculation

Trendiness.py

# -*- coding: utf-8 -*-
"""
Created on Thu Jun 11 07:51:15 2020

@author: Bruce1
"""

#=======================================================================
# Import Libraries
# =======================================================================
import pandas as pd 
import pandas_datareader.data as web
from   pathlib import Path
import matplotlib.pyplot as plt
import numpy as np

# =======================================================================
# Gather data function
# =======================================================================
def gatherData(sym, startDate):
    #import pdb; pdb.set_trace()

    savedFile = Path('./{}.xlsx'.format(sym))
    
    if savedFile.exists() == False or refreshData == True:
        print("")
        print("-> Fetching data from the web")
        df = web.DataReader(sym, data_source='yahoo', start=startDate)
    
        print("")
        print("-> Save data to file")  
        df.to_excel("{}.xlsx".format(sym))

    else:
        print("")
        print("-> Fetching data from file") 
        df = pd.read_excel(savedFile, index_col='Date', parse_dates=True)

    # =======================================================================
    # Inspect/Report on data
    # =======================================================================
    firstIndex = df.index.min()
    lastIndex  = df.index.max()
    records = len(df)
    print("")
    print("-> Importing ", sym)
    print("First Date = ", firstIndex)
    print("Last Date  = ", lastIndex)
    print("Total Days = ", records)

    if df.isnull().values.any() == True:
        print("WARNING: there are {} NaN in the data".format(df.isnull().values.sum()))
        print(df.isnull().values)
        
    return df


# =======================================================================
# Setup Porgram Variables
# =======================================================================
symList = ['SPY', 'TLT', 'GLD']

startDate = '01/01/2000'

refreshData = False

lookback = 50


dfDict = {}

for sym in symList:
    
    dfDict[sym] = gatherData(sym, startDate)
    
# =======================================================================
# Daily Analysis for Trendiness
# - For this program, a trend is closes above or below a simple moving
#   average.  Trend length is defined as a streak, concecutive closes
#   either above or below.
# =======================================================================
#import pdb; pdb.set_trace()

for sym in symList:
    
    df = dfDict[sym]
    df.rename(columns={'Adj Close': 'AdjClose'}, inplace=True)
    
    df.loc[:,'SMA'] = df.AdjClose.rolling(lookback).mean()
    
    df.loc[:,'Above'] = df.AdjClose > df.SMA
    df.loc[:,'Below'] = df.AdjClose < df.SMA
    
    #df.loc[:,'AbvMeta'] = df.Above.groupby(df.Above.diff().cumsum()).cumcount()
    x = df.Above - df.Above.shift()
    x = x.astype(bool)
    x = x.cumsum()
    df.loc[:,'AbvMeta']  = df.Above.groupby(x).cumcount()
    
    df.loc[:,'NextAbove'] = df.Above.shift(-1)
    
    df.loc[:,'AbvStreak'] = df.apply(lambda x: x.AbvMeta if ( 
          (x.Above == True) & (x.NextAbove == False)) else 0, axis=1)


    x = df.Below - df.Below.shift()
    x = x.astype(bool)
    x = x.cumsum()
    df.loc[:,'BlwMeta']  = df.Below.groupby(x).cumcount()
    
    df.loc[:,'NextBelow'] = df.Below.shift(-1)
    
    df.loc[:,'BlwStreak'] = df.apply(lambda x: -x.AbvMeta if ( 
          (x.Below == True) & (x.NextBelow == False)) else 0, axis=1)
    
    #import pdb; pdb.set_trace()
    df.loc[:,'Streak'] = df.apply(lambda x: x.AbvStreak if x.AbvStreak != 0 
          else np.NaN, axis=1)
    
    df.loc[:,'Streak'] = df.apply(lambda x: x.BlwStreak if x.BlwStreak != 0 
          else x.Streak, axis=1)
    
    t='{}'.format(sym)
    df.Streak.plot(kind='hist',bins=25,title=t)
    plt.show()