Macy’s stock seasonality¶
Background¶
I noticed that in the last few years, Macy’s stock has been growing around the Thanksgiving season, which aligns with makes sense, since that’s where Americans buy their presents. I wanted to do a little more math than just eye-balling the chart. Below is an export from Jupyter Notebook that I used to analyze daily stock prices for Macy’s in the range from October to December from 2015 to 2023.
Imports & Constants¶
import pandas
import matplotlib.pyplot as plt
# Inclusive range for dicing
YEARS = (2015, 2023)
MONTHS = (10, 12)
Download S&P 500 stock prices history as a CSV and parse the date¶
# https://www.nasdaq.com/market-activity/index/spx/historical?page=102&rows_per_page=25&timeline=y10
spx = pandas.read_csv('60-SPX.csv')
spx = spx.reindex(index=spx.index[::-1])
spx = spx.reset_index(drop=True)
del spx['Open']
del spx['Close/Last']
spx['Year'] = spx['Date'].str.split('/', expand=True)[2].astype(int)
spx['Month'] = spx['Date'].str.split('/', expand=True)[0].astype(int)
spx['Day'] = spx['Date'].str.split('/', expand=True)[1].astype(int)
spx
| Date | High | Low | Year | Month | Day | |
|---|---|---|---|---|---|---|
| 0 | 08/26/2014 | 2005.04 | 1998.59 | 2014 | 8 | 26 |
| 1 | 08/27/2014 | 2002.14 | 1996.20 | 2014 | 8 | 27 |
| 2 | 08/28/2014 | 1998.55 | 1990.52 | 2014 | 8 | 28 |
| 3 | 08/29/2014 | 2003.38 | 1994.65 | 2014 | 8 | 29 |
| 4 | 09/01/2014 | 0.00 | 0.00 | 2014 | 9 | 1 |
| ... | ... | ... | ... | ... | ... | ... |
| 2523 | 08/19/2024 | 5608.30 | 5550.74 | 2024 | 8 | 19 |
| 2524 | 08/20/2024 | 5620.51 | 5585.50 | 2024 | 8 | 20 |
| 2525 | 08/21/2024 | 5632.68 | 5591.57 | 2024 | 8 | 21 |
| 2526 | 08/22/2024 | 5643.22 | 5560.95 | 2024 | 8 | 22 |
| 2527 | 08/23/2024 | 5641.82 | 5585.16 | 2024 | 8 | 23 |
2528 rows × 6 columns
Download the target stock (M) prices history and parse the date¶
# https://finance.yahoo.com/quote/M/history/
prices = pandas.read_csv('60-M.csv')
del prices['Open']
del prices['Close']
del prices['Adj Close']
del prices['Volume']
prices['Year'] = prices['Date'].str.split('-', expand=True)[0].astype(int)
prices['Month'] = prices['Date'].str.split('-', expand=True)[1].astype(int)
prices['Day'] = prices['Date'].str.split('-', expand=True)[2].astype(int)
Merge the two datasets using date¶
Discard any rows with incomplete data
df = pandas.merge(spx, prices, on=['Year', 'Month', 'Day'], how='inner', suffixes=('Spx', 'Stock'))
df = df[(YEARS[0] <= df.Year) & (df.Year <= YEARS[1]) & (MONTHS[0] <= df.Month) & (df.Month <= MONTHS[1])]
df['didx'] = [
d
for year in range(YEARS[0], YEARS[1] + 1)
for d in range((df.Year == year).sum())
]
df.describe()
| HighSpx | LowSpx | Year | Month | Day | HighStock | LowStock | didx | |
|---|---|---|---|---|---|---|---|---|
| count | 571.000000 | 571.000000 | 571.000000 | 571.000000 | 571.000000 | 571.000000 | 571.000000 | 571.000000 |
| mean | 3249.744991 | 3215.057180 | 2018.998249 | 10.984238 | 15.562172 | 25.290193 | 24.382172 | 31.224168 |
| std | 899.323476 | 888.901796 | 2.582328 | 0.821699 | 8.823070 | 11.689632 | 11.410891 | 18.331992 |
| min | 1927.210000 | 1893.700000 | 2015.000000 | 10.000000 | 1.000000 | 5.990000 | 5.570000 | 0.000000 |
| 25% | 2556.490000 | 2543.895000 | 2017.000000 | 10.000000 | 8.000000 | 15.915000 | 15.395000 | 15.000000 |
| 50% | 3098.200000 | 3083.260000 | 2019.000000 | 11.000000 | 15.000000 | 23.059999 | 22.040001 | 31.000000 |
| 75% | 3981.490000 | 3935.905000 | 2021.000000 | 12.000000 | 23.000000 | 35.070002 | 33.399999 | 47.000000 |
| max | 4808.930000 | 4780.980000 | 2023.000000 | 12.000000 | 31.000000 | 52.480000 | 51.209999 | 63.000000 |
Square up the data¶
We want to plot several years on the same chart, so let’s trim the excess. For all the covered years find the smallest last day index.
years = list(range(min(df.Year), max(df.Year) + 1))
max_didx = min(max(df[df.Year==year].didx) for year in years)
min_didx = max(min(df[df.Year==year].didx) for year in years)
didxs = list(range(min_didx, max_didx + 1))
Plot¶
The stock price deviations for the selected months range over the selected years.
The S&P index price for the same dates.
Stock price corrected to S&P index.
Note that we plot percentage changes of the stock from the first day of the season. On day one all prices are at 0%, and then for each year they deviate.
def convert_to_season(df, y, years, didxs):
base = dict(zip(df.Year[df.didx==0], y[df.didx==0]))
return pandas.DataFrame(
[
[
(y[(df.Year==year) & (df.didx==didx)].iloc[0] - base[year]) * 100 / base[year]
for year in years
]
for didx in didxs
],
columns=map(str, years)
)
stock_seasons = convert_to_season(df, df.LowStock, years, didxs)
spx_seasons = convert_to_season(df, df.LowSpx, years, didxs)
stock_seasons.plot()
spx_seasons.plot()
(stock_seasons - spx_seasons).plot();
Download the M prices history with 1-minute precision¶
# https://lime.co/market-data-source/#i5z6wk
prices = pandas.read_csv('60-M-1m.csv', delimiter=';')
for column in ('ticker', 'open', 'high', 'low', 'volume'):
del prices[column]
prices['Year'] = prices['date'].str.split('/', expand=True)[2].astype(int)
prices['Month'] = prices['date'].str.split('/', expand=True)[0].astype(int)
prices['Day'] = prices['date'].str.split('/', expand=True)[1].astype(int)
prices['Hour'] = prices['time'].str.split(':', expand=True)[0].astype(int)
prices['Minute'] = prices['time'].str.split(':', expand=True)[1].astype(int)
prices
| date | time | close | Year | Month | Day | Hour | Minute | |
|---|---|---|---|---|---|---|---|---|
| 0 | 7/1/2024 | 6:30:00 AM | 19.100 | 2024 | 7 | 1 | 6 | 30 |
| 1 | 7/1/2024 | 6:31:00 AM | 19.030 | 2024 | 7 | 1 | 6 | 31 |
| 2 | 7/1/2024 | 6:32:00 AM | 19.050 | 2024 | 7 | 1 | 6 | 32 |
| 3 | 7/1/2024 | 6:33:00 AM | 19.065 | 2024 | 7 | 1 | 6 | 33 |
| 4 | 7/1/2024 | 6:34:00 AM | 19.020 | 2024 | 7 | 1 | 6 | 34 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 31726 | 10/24/2024 | 12:55:00 PM | 15.430 | 2024 | 10 | 24 | 12 | 55 |
| 31727 | 10/24/2024 | 12:56:00 PM | 15.450 | 2024 | 10 | 24 | 12 | 56 |
| 31728 | 10/24/2024 | 12:57:00 PM | 15.445 | 2024 | 10 | 24 | 12 | 57 |
| 31729 | 10/24/2024 | 12:58:00 PM | 15.445 | 2024 | 10 | 24 | 12 | 58 |
| 31730 | 10/24/2024 | 12:59:00 PM | 15.410 | 2024 | 10 | 24 | 12 | 59 |
31731 rows × 8 columns
Hypothesis 1¶
When Macy’s stock spikes in the morning by at least 20c, it slides down in the evening by at least 10c.
def analyze_day(date: str, df: list[float]) -> tuple:
open_price = df[(df.Hour==6) & (df.Minute==30)].close.iloc[0]
morning_spike = max(df[df.Hour <= 6].close)
day_decline = min(df[(df.Hour >= 8) & (df.Hour <= 11)].close)
return open_price, morning_spike, day_decline, (morning_spike - open_price), (morning_spike - day_decline)
data = []
for date in set(prices['date']):
data.append([date, *analyze_day(date, prices[prices.date == date])])
# Spike-decline
sd = pandas.DataFrame(data, columns=('date', 'open_price', 'morning_spike', 'day_decline', 'spike', 'decline'))
len(sd[(sd.spike > 0.2)]), len(sd[(sd.spike > 0.2) & (sd.decline > 0.1)])
(14, 9)
sd[(sd.spike > 0.2) & (sd.decline > 0.1)][:5]
| date | open_price | morning_spike | day_decline | spike | decline | |
|---|---|---|---|---|---|---|
| 4 | 9/13/2024 | 14.760 | 15.160 | 14.8347 | 0.400 | 0.3253 |
| 7 | 8/6/2024 | 15.224 | 15.485 | 15.3800 | 0.261 | 0.1050 |
| 8 | 7/11/2024 | 19.130 | 19.470 | 19.2900 | 0.340 | 0.1800 |
| 17 | 7/5/2024 | 19.420 | 19.900 | 19.4800 | 0.480 | 0.4200 |
| 34 | 7/3/2024 | 17.940 | 18.180 | 17.9250 | 0.240 | 0.2550 |
import matplotlib.pyplot as plt
positive_days = sd[(sd.spike > 0.2) & (sd.decline > 0.1)]
negative_days = sd[(sd.spike > 0.2) & (sd.decline <= 0.1)]
def plot_samples(days, prices, count=5):
fig, axes = plt.subplots(nrows=count, ncols=1, figsize=(10, 10))
for i in range(count):
day = days.iloc[i]
date = day.date
prices[prices.date == date].close.plot(ax=axes[i])
axes[i].axhline(y=day.morning_spike, color='red', linestyle='--')
axes[i].axhline(y=day.day_decline, color='green', linestyle='--')
plot_samples(positive_days, prices)
plot_samples(negative_days, prices)
Hypothesis 2¶
If you sell at high point before 11 AM, you can buy back by the end of day at least 10c lower price.
def analyze_day(date: str, df: list[float]) -> tuple:
prices = list(df.close)
open_price = prices[0]
high_price = max(df[df.Hour <= 8].close)
close_price = prices[-1]
open_delta = high_price - open_price
close_delta = high_price - close_price
day_delta = close_price - open_price
return [high_price - close_price, close_price - open_price]
data = []
for date in set(prices['date']):
data.append([date, *analyze_day(date, prices[prices.date == date])])
chart = pandas.DataFrame(data, columns=('date', 'play', 'nothing'))
chart.plot()
len(chart[chart.play - 0.2 > chart.nothing]) / len(chart)
0.524390243902439