Exercise 9-1: Analyze time-series data¶

In [ ]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
In [ ]:
stockData = pd.read_pickle('/content/stocks.pkl')
stockData.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 253 entries, 0 to 252
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    253 non-null    datetime64[ns]
 1   Open    253 non-null    float64       
 2   High    253 non-null    float64       
 3   Low     253 non-null    float64       
 4   Close   253 non-null    float64       
dtypes: datetime64[ns](1), float64(4)
memory usage: 10.0 KB
In [ ]:
# display the first five rows
stockData.head(25)
Out[ ]:
Date Open High Low Close
0 2020-01-02 74.059998 75.150002 73.797501 75.087502
1 2020-01-03 74.287498 75.144997 74.125000 74.357498
2 2020-01-06 73.447502 74.989998 73.187500 74.949997
3 2020-01-07 74.959999 75.224998 74.370003 74.597504
4 2020-01-08 74.290001 76.110001 74.290001 75.797501
5 2020-01-09 76.809998 77.607498 76.550003 77.407501
6 2020-01-10 77.650002 78.167503 77.062500 77.582497
7 2020-01-13 77.910004 79.267502 77.787498 79.239998
8 2020-01-14 79.175003 79.392502 78.042503 78.169998
9 2020-01-15 77.962502 78.875000 77.387497 77.834999
10 2020-01-16 78.397499 78.925003 78.022499 78.809998
11 2020-01-17 79.067497 79.684998 78.750000 79.682503
12 2020-01-21 79.297501 79.754997 79.000000 79.142502
13 2020-01-22 79.644997 79.997498 79.327499 79.425003
14 2020-01-23 79.480003 79.889999 78.912498 79.807503
15 2020-01-24 80.062500 80.832497 79.379997 79.577499
16 2020-01-27 77.514999 77.942497 76.220001 77.237503
17 2020-01-28 78.150002 79.599998 78.047501 79.422501
18 2020-01-29 81.112503 81.962502 80.345001 81.084999
19 2020-01-30 80.135002 81.022499 79.687500 80.967499
20 2020-01-31 80.232498 80.669998 77.072502 77.377502
21 2020-02-03 76.074997 78.372498 75.555000 77.165001
22 2020-02-04 78.827499 79.910004 78.407501 79.712502
23 2020-02-05 80.879997 81.190002 79.737503 80.362503
24 2020-02-06 80.642502 81.305000 80.065002 81.302498

Generate date ranges¶

In [ ]:
# generate a daterange for every other day in the year 2020
every_other_day = pd.date_range(start='2020-01-01', end='2020-12-31', freq='2D')
In [ ]:
# generate a daterange for every 3 hours in 2020
every_three_hours = pd.date_range(start='2020-01-01', end='2020-12-31', freq='3H')
In [ ]:
# generate a date range for every other Friday in 2020
every_other_friday = pd.date_range(start='2020-01-03', end='2020-12-25', freq='2W-FRI')

print(every_other_day)
print(every_three_hours)
print(every_other_friday)
DatetimeIndex(['2020-01-01', '2020-01-03', '2020-01-05', '2020-01-07',
               '2020-01-09', '2020-01-11', '2020-01-13', '2020-01-15',
               '2020-01-17', '2020-01-19',
               ...
               '2020-12-12', '2020-12-14', '2020-12-16', '2020-12-18',
               '2020-12-20', '2020-12-22', '2020-12-24', '2020-12-26',
               '2020-12-28', '2020-12-30'],
              dtype='datetime64[ns]', length=183, freq='2D')
DatetimeIndex(['2020-01-01 00:00:00', '2020-01-01 03:00:00',
               '2020-01-01 06:00:00', '2020-01-01 09:00:00',
               '2020-01-01 12:00:00', '2020-01-01 15:00:00',
               '2020-01-01 18:00:00', '2020-01-01 21:00:00',
               '2020-01-02 00:00:00', '2020-01-02 03:00:00',
               ...
               '2020-12-29 21:00:00', '2020-12-30 00:00:00',
               '2020-12-30 03:00:00', '2020-12-30 06:00:00',
               '2020-12-30 09:00:00', '2020-12-30 12:00:00',
               '2020-12-30 15:00:00', '2020-12-30 18:00:00',
               '2020-12-30 21:00:00', '2020-12-31 00:00:00'],
              dtype='datetime64[ns]', length=2921, freq='3H')
DatetimeIndex(['2020-01-03', '2020-01-17', '2020-01-31', '2020-02-14',
               '2020-02-28', '2020-03-13', '2020-03-27', '2020-04-10',
               '2020-04-24', '2020-05-08', '2020-05-22', '2020-06-05',
               '2020-06-19', '2020-07-03', '2020-07-17', '2020-07-31',
               '2020-08-14', '2020-08-28', '2020-09-11', '2020-09-25',
               '2020-10-09', '2020-10-23', '2020-11-06', '2020-11-20',
               '2020-12-04', '2020-12-18'],
              dtype='datetime64[ns]', freq='2W-FRI')

Reindex the data¶

In [ ]:
# set an index on the date column for the stock data
stockData.set_index('Date', inplace=True)
In [ ]:
# reindex the data so the data contains only Fridays
# and assign the result to a variable called stockDataFridays
stockDataFridays = stockData.loc[stockData.index.dayofweek == 4]
print(stockDataFridays.head())
                 Open       High        Low      Close
Date                                                  
2020-01-03  74.287498  75.144997  74.125000  74.357498
2020-01-10  77.650002  78.167503  77.062500  77.582497
2020-01-17  79.067497  79.684998  78.750000  79.682503
2020-01-24  80.062500  80.832497  79.379997  79.577499
2020-01-31  80.232498  80.669998  77.072502  77.377502
In [ ]:
# use Pandas to plot the Close column of the reindexed data
plt.plot(stockDataFridays['Close'])
plt.title('Close Price on Fridays')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.show()
No description has been provided for this image

Resample the data¶

In [ ]:
# downsample the data to a monthly frequency
stockDataDown = stockData.resample('M').mean()
In [ ]:
# use Pandas to plot the Close column of the resampled data
plt.plot(stockDataDown['Close'])
plt.title('Monthly Mean Close Prices')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.show()
No description has been provided for this image

Compute a rolling window¶

In [ ]:
# compute a 2 week rolling average for the Close column
# set the min_periods to 1 and assign the data to a variable called stocksRolling
stocksRolling = stockData['Close'].rolling(window='14D', min_periods=1).mean()
In [ ]:
# use Pandas to plot the Close column of the rolling data
plt.plot(stocksRolling)
plt.title('2Week Rolling Average for Close Prices')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.show()
No description has been provided for this image