Exercise 9-1: Analyze time-series data¶
In [ ]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
In [ ]:
stockData = pd.read_pickle('/content/stocks.pkl')
stockData.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 253 entries, 0 to 252 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 253 non-null datetime64[ns] 1 Open 253 non-null float64 2 High 253 non-null float64 3 Low 253 non-null float64 4 Close 253 non-null float64 dtypes: datetime64[ns](1), float64(4) memory usage: 10.0 KB
In [ ]:
# display the first five rows
stockData.head(25)
Out[ ]:
Date | Open | High | Low | Close | |
---|---|---|---|---|---|
0 | 2020-01-02 | 74.059998 | 75.150002 | 73.797501 | 75.087502 |
1 | 2020-01-03 | 74.287498 | 75.144997 | 74.125000 | 74.357498 |
2 | 2020-01-06 | 73.447502 | 74.989998 | 73.187500 | 74.949997 |
3 | 2020-01-07 | 74.959999 | 75.224998 | 74.370003 | 74.597504 |
4 | 2020-01-08 | 74.290001 | 76.110001 | 74.290001 | 75.797501 |
5 | 2020-01-09 | 76.809998 | 77.607498 | 76.550003 | 77.407501 |
6 | 2020-01-10 | 77.650002 | 78.167503 | 77.062500 | 77.582497 |
7 | 2020-01-13 | 77.910004 | 79.267502 | 77.787498 | 79.239998 |
8 | 2020-01-14 | 79.175003 | 79.392502 | 78.042503 | 78.169998 |
9 | 2020-01-15 | 77.962502 | 78.875000 | 77.387497 | 77.834999 |
10 | 2020-01-16 | 78.397499 | 78.925003 | 78.022499 | 78.809998 |
11 | 2020-01-17 | 79.067497 | 79.684998 | 78.750000 | 79.682503 |
12 | 2020-01-21 | 79.297501 | 79.754997 | 79.000000 | 79.142502 |
13 | 2020-01-22 | 79.644997 | 79.997498 | 79.327499 | 79.425003 |
14 | 2020-01-23 | 79.480003 | 79.889999 | 78.912498 | 79.807503 |
15 | 2020-01-24 | 80.062500 | 80.832497 | 79.379997 | 79.577499 |
16 | 2020-01-27 | 77.514999 | 77.942497 | 76.220001 | 77.237503 |
17 | 2020-01-28 | 78.150002 | 79.599998 | 78.047501 | 79.422501 |
18 | 2020-01-29 | 81.112503 | 81.962502 | 80.345001 | 81.084999 |
19 | 2020-01-30 | 80.135002 | 81.022499 | 79.687500 | 80.967499 |
20 | 2020-01-31 | 80.232498 | 80.669998 | 77.072502 | 77.377502 |
21 | 2020-02-03 | 76.074997 | 78.372498 | 75.555000 | 77.165001 |
22 | 2020-02-04 | 78.827499 | 79.910004 | 78.407501 | 79.712502 |
23 | 2020-02-05 | 80.879997 | 81.190002 | 79.737503 | 80.362503 |
24 | 2020-02-06 | 80.642502 | 81.305000 | 80.065002 | 81.302498 |
Generate date ranges¶
In [ ]:
# generate a daterange for every other day in the year 2020
every_other_day = pd.date_range(start='2020-01-01', end='2020-12-31', freq='2D')
In [ ]:
# generate a daterange for every 3 hours in 2020
every_three_hours = pd.date_range(start='2020-01-01', end='2020-12-31', freq='3H')
In [ ]:
# generate a date range for every other Friday in 2020
every_other_friday = pd.date_range(start='2020-01-03', end='2020-12-25', freq='2W-FRI')
print(every_other_day)
print(every_three_hours)
print(every_other_friday)
DatetimeIndex(['2020-01-01', '2020-01-03', '2020-01-05', '2020-01-07', '2020-01-09', '2020-01-11', '2020-01-13', '2020-01-15', '2020-01-17', '2020-01-19', ... '2020-12-12', '2020-12-14', '2020-12-16', '2020-12-18', '2020-12-20', '2020-12-22', '2020-12-24', '2020-12-26', '2020-12-28', '2020-12-30'], dtype='datetime64[ns]', length=183, freq='2D') DatetimeIndex(['2020-01-01 00:00:00', '2020-01-01 03:00:00', '2020-01-01 06:00:00', '2020-01-01 09:00:00', '2020-01-01 12:00:00', '2020-01-01 15:00:00', '2020-01-01 18:00:00', '2020-01-01 21:00:00', '2020-01-02 00:00:00', '2020-01-02 03:00:00', ... '2020-12-29 21:00:00', '2020-12-30 00:00:00', '2020-12-30 03:00:00', '2020-12-30 06:00:00', '2020-12-30 09:00:00', '2020-12-30 12:00:00', '2020-12-30 15:00:00', '2020-12-30 18:00:00', '2020-12-30 21:00:00', '2020-12-31 00:00:00'], dtype='datetime64[ns]', length=2921, freq='3H') DatetimeIndex(['2020-01-03', '2020-01-17', '2020-01-31', '2020-02-14', '2020-02-28', '2020-03-13', '2020-03-27', '2020-04-10', '2020-04-24', '2020-05-08', '2020-05-22', '2020-06-05', '2020-06-19', '2020-07-03', '2020-07-17', '2020-07-31', '2020-08-14', '2020-08-28', '2020-09-11', '2020-09-25', '2020-10-09', '2020-10-23', '2020-11-06', '2020-11-20', '2020-12-04', '2020-12-18'], dtype='datetime64[ns]', freq='2W-FRI')
Reindex the data¶
In [ ]:
# set an index on the date column for the stock data
stockData.set_index('Date', inplace=True)
In [ ]:
# reindex the data so the data contains only Fridays
# and assign the result to a variable called stockDataFridays
stockDataFridays = stockData.loc[stockData.index.dayofweek == 4]
print(stockDataFridays.head())
Open High Low Close Date 2020-01-03 74.287498 75.144997 74.125000 74.357498 2020-01-10 77.650002 78.167503 77.062500 77.582497 2020-01-17 79.067497 79.684998 78.750000 79.682503 2020-01-24 80.062500 80.832497 79.379997 79.577499 2020-01-31 80.232498 80.669998 77.072502 77.377502
In [ ]:
# use Pandas to plot the Close column of the reindexed data
plt.plot(stockDataFridays['Close'])
plt.title('Close Price on Fridays')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.show()
Resample the data¶
In [ ]:
# downsample the data to a monthly frequency
stockDataDown = stockData.resample('M').mean()
In [ ]:
# use Pandas to plot the Close column of the resampled data
plt.plot(stockDataDown['Close'])
plt.title('Monthly Mean Close Prices')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.show()
Compute a rolling window¶
In [ ]:
# compute a 2 week rolling average for the Close column
# set the min_periods to 1 and assign the data to a variable called stocksRolling
stocksRolling = stockData['Close'].rolling(window='14D', min_periods=1).mean()
In [ ]:
# use Pandas to plot the Close column of the rolling data
plt.plot(stocksRolling)
plt.title('2Week Rolling Average for Close Prices')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.show()