import os
import warnings
warnings.filterwarnings('ignore')
from pylab import rcParams
rcParams['figure.figsize'] = 10, 6
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.arima.model import ARIMAResults
! pip install pmdarima
from pmdarima.arima import auto_arima
from sklearn.metrics import mean_squared_error, mean_absolute_error,mean_squared_log_error
import math
import numpy as np
! pip install nsepy
from nsepy import get_history
from datetime import date
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.stattools import pacf
from statsmodels.tsa.deterministic import CalendarFourier, DeterministicProcess
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
import requests
import datetime
from datetime import datetime, timedelta
from datetime import date
from dateutil.relativedelta import relativedelta
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from pathlib import Path
from warnings import simplefilter
from sklearn.linear_model import LinearRegression
from statsmodels.graphics.tsaplots import plot_pacf
import datetime
from datetime import datetime, timedelta
from datetime import date
from dateutil.relativedelta import relativedelta
Requirement already satisfied: pmdarima in c:\programdata\anaconda3\lib\site-packages (1.8.5) Requirement already satisfied: Cython!=0.29.18,>=0.29 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (0.29.23) Requirement already satisfied: statsmodels!=0.12.0,>=0.11 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (0.12.2) Requirement already satisfied: urllib3 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (1.26.4) Requirement already satisfied: setuptools!=50.0.0,>=38.6.0 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (52.0.0.post20210125) Requirement already satisfied: joblib>=0.11 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (1.0.1) Requirement already satisfied: numpy>=1.19.3 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (1.20.1) Requirement already satisfied: scikit-learn>=0.22 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (0.24.1) Requirement already satisfied: pandas>=0.19 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (1.2.4) Requirement already satisfied: scipy>=1.3.2 in c:\programdata\anaconda3\lib\site-packages (from pmdarima) (1.6.2) Requirement already satisfied: python-dateutil>=2.7.3 in c:\programdata\anaconda3\lib\site-packages (from pandas>=0.19->pmdarima) (2.8.1) Requirement already satisfied: pytz>=2017.3 in c:\programdata\anaconda3\lib\site-packages (from pandas>=0.19->pmdarima) (2021.1) Requirement already satisfied: threadpoolctl>=2.0.0 in c:\programdata\anaconda3\lib\site-packages (from scikit-learn>=0.22->pmdarima) (2.1.0) Requirement already satisfied: patsy>=0.5 in c:\programdata\anaconda3\lib\site-packages (from statsmodels!=0.12.0,>=0.11->pmdarima) (0.5.1) Requirement already satisfied: six in c:\programdata\anaconda3\lib\site-packages (from patsy>=0.5->statsmodels!=0.12.0,>=0.11->pmdarima) (1.15.0)
WARNING: Ignoring invalid distribution -portlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -portlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -mportlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution - (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -portlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -portlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -mportlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution - (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -portlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -portlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -mportlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution - (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -portlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -portlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -mportlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution - (c:\programdata\anaconda3\lib\site-packages)
Requirement already satisfied: nsepy in c:\programdata\anaconda3\lib\site-packages (0.8) Requirement already satisfied: six in c:\programdata\anaconda3\lib\site-packages (from nsepy) (1.15.0) Requirement already satisfied: pandas in c:\programdata\anaconda3\lib\site-packages (from nsepy) (1.2.4) Requirement already satisfied: requests in c:\programdata\anaconda3\lib\site-packages (from nsepy) (2.25.1) Requirement already satisfied: numpy in c:\programdata\anaconda3\lib\site-packages (from nsepy) (1.20.1) Requirement already satisfied: lxml in c:\programdata\anaconda3\lib\site-packages (from nsepy) (4.6.3) Requirement already satisfied: click in c:\programdata\anaconda3\lib\site-packages (from nsepy) (7.1.2) Requirement already satisfied: beautifulsoup4 in c:\programdata\anaconda3\lib\site-packages (from nsepy) (4.9.3) Requirement already satisfied: soupsieve>1.2 in c:\programdata\anaconda3\lib\site-packages (from beautifulsoup4->nsepy) (2.2.1) Requirement already satisfied: pytz>=2017.3 in c:\programdata\anaconda3\lib\site-packages (from pandas->nsepy) (2021.1) Requirement already satisfied: python-dateutil>=2.7.3 in c:\programdata\anaconda3\lib\site-packages (from pandas->nsepy) (2.8.1) Requirement already satisfied: certifi>=2017.4.17 in c:\programdata\anaconda3\lib\site-packages (from requests->nsepy) (2020.12.5) Requirement already satisfied: chardet<5,>=3.0.2 in c:\programdata\anaconda3\lib\site-packages (from requests->nsepy) (4.0.0) Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\programdata\anaconda3\lib\site-packages (from requests->nsepy) (1.26.4) Requirement already satisfied: idna<3,>=2.5 in c:\programdata\anaconda3\lib\site-packages (from requests->nsepy) (2.10)
WARNING: Ignoring invalid distribution -portlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -portlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -mportlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution - (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -portlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -portlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -mportlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution - (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -portlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -portlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -mportlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution - (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -portlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -portlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution -mportlib-metadata (c:\programdata\anaconda3\lib\site-packages) WARNING: Ignoring invalid distribution - (c:\programdata\anaconda3\lib\site-packages)
def plot_periodogram(ts, detrend='linear', ax=None):
from scipy.signal import periodogram
fs = pd.Timedelta("1Y") / pd.Timedelta("1D")
freqencies, spectrum = periodogram(
ts,
fs=fs,
detrend=detrend,
window="boxcar",
scaling='spectrum',
)
if ax is None:
_, ax = plt.subplots()
ax.step(freqencies, spectrum, color="purple")
ax.set_xscale("log")
ax.set_xticks([1, 2, 4, 6, 12, 26, 52, 104])
ax.set_xticklabels(
[
"Annual (1)",
"Semiannual (2)",
"Quarterly (4)",
"Bimonthly (6)",
"Monthly (12)",
"Biweekly (26)",
"Weekly (52)",
"Semiweekly (104)",
],
rotation=30,
)
ax.ticklabel_format(axis="y", style="sci", scilimits=(0, 0))
ax.set_ylabel("Variance")
ax.set_title("Periodogram")
return ax
#plot_periodogram(sbin.Close)
def lagplot(x, y=None, lag=1, standardize=False, ax=None, **kwargs):
from matplotlib.offsetbox import AnchoredText
x_ = x.shift(lag)
if standardize:
x_ = (x_ - x_.mean()) / x_.std()
if y is not None:
y_ = (y - y.mean()) / y.std() if standardize else y
else:
y_ = x
corr = y_.corr(x_)
if ax is None:
fig, ax = plt.subplots()
scatter_kws = dict(
alpha=0.75,
s=3,
)
line_kws = dict(color='C3', )
ax = sns.regplot(x=x_,
y=y_,
scatter_kws=scatter_kws,
line_kws=line_kws,
lowess=True,
ax=ax,
**kwargs)
at = AnchoredText(
f"{corr:.2f}",
prop=dict(size="large"),
frameon=True,
loc="upper left",
)
at.patch.set_boxstyle("square, pad=0.0")
ax.add_artist(at)
ax.set(title=f"Lag {lag}", xlabel=x_.name, ylabel=y_.name)
return ax
def plot_lags(x, y=None, lags=6, nrows=1, lagplot_kwargs={}, **kwargs):
import math
kwargs.setdefault('nrows', nrows)
kwargs.setdefault('ncols', math.ceil(lags / nrows))
kwargs.setdefault('figsize', (kwargs['ncols'] * 2, nrows * 2 + 0.5))
fig, axs = plt.subplots(sharex=True, sharey=True, squeeze=False, **kwargs)
for ax, k in zip(fig.get_axes(), range(kwargs['nrows'] * kwargs['ncols'])):
if k + 1 <= lags:
ax = lagplot(x, y, lag=k + 1, ax=ax, **lagplot_kwargs,color='blue')
ax.set_title(f"Lag {k + 1}", fontdict=dict(fontsize=14))
ax.set(xlabel="", ylabel="")
else:
ax.axis('off')
plt.setp(axs[-1, :], xlabel=x.name)
plt.setp(axs[:, 0], ylabel=y.name if y is not None else x.name)
fig.tight_layout(w_pad=0.1, h_pad=0.1)
return fig
def make_lags(ts, lags):
return pd.concat(
{
f'y_lag_{i}': ts.shift(i)
for i in range(1, lags + 1)
},
axis=1)
plt.style.use("seaborn-whitegrid")
plt.rc("figure", autolayout=True, figsize=(11, 5))
plt.rc(
"axes",
labelweight="bold",
labelsize="large",
titleweight="bold",
titlesize=16,
titlepad=10,
)
plot_params = dict(
color="0.75",
style=".-",
markeredgecolor="0.25",
markerfacecolor="0.25",
legend=False,
)
plot_params2 = dict(
color="#BF3EFF",
style="-",
markeredgecolor="0.25",
markerfacecolor="0.25",
legend=False,
)
plot_params3 = dict(
color="#3A5AFF",
style=".-",
markeredgecolor="0.25",
markerfacecolor="0.25",
legend=False,
)
%config InlineBackend.figure_format = 'retina'
import json
import requests
#We need this reference token to request an authorization token
reftok = 'Icantreallysharethissinceyoucouldgetaccesstomyaccount'
#If we want to get a new authorization token, we need to send a request to this url
newtoken = 'https://api.tdameritrade.com/v1/oauth2/token'
#These are the parameters that need to be provided when the request is made
pays = { 'grant_type': 'refresh_token', 'refresh_token': reftok,
'client_id': 'Icantshowthiseithersinceyoucouldgetaccesstomyaccount',
'redirect_uri': 'https://localhost'}
heads2 = { 'Content-Type': 'application/x-www-form-urlencoded' } #Headers to describe the request being made
#Posting our request to the page and retrieving that authorization code
authpage = requests.post(url=newtoken,headers = heads2,data = pays)
auth = authpage.json()
newauth = auth['access_token']
#Now let's grab the stock data
name = 'GOOGL' #Name of stock
end = '1655127000000' #Unix time stamp of the stock market open on June 13th, 2022
#The request for data, with the ID of the bot created through the TD Ameritrade API
#Includes period requested, stock name, frequency of data, end and start date
payload = {'client_id': 'Icantshowthiseithersinceyoucouldgetaccesstomyaccount4SG@AMER.OAUTHAP',
'periodType':'year','period':'10',
'frequencyType':'daily','frequency' : '1','endDate' : end}
#Authorization for the request being made
heads = {'Authorization':f'Bearer {}'.format(newauth),}
#Pulling the response from the page after the request is posted
page = requests.get(url=f'https://api.tdameritrade.com/v1/marketdata/{name}/pricehistory',
headers = heads,params = payload)
content = page.json()
#The content that is spit out is in a dictionary form
#Each day has a unique entry in the dictionary in Unix time
#Let's add a column with that Unix time (that way we can index the data)
for i in content['candles']:
i["datetime"] = datetime.fromtimestamp(int(str(i['datetime'])[:-3]))
#Pulling apart the dictionary into a pandas dataframe
msftstock = pd.DataFrame(content["candles"])
cols = msftstock.columns.tolist()
cols = cols[-1:] + cols[:-1]
msftstock = msftstock[cols]
#Converting Unix time to a datetime object (interpretable)
msftstock.datetime = pd.to_datetime(msftstock.datetime, format='%y-%m-%d')
msftstock = msftstock.rename(columns = {'datetime':'Date','open':'Open','high':'High',
'low':'Low','close':'Close','volume':'Volume'})
from sklearn.model_selection import train_test_split
msftstock = pd.read_csv(
"MSFT.csv",
index_col='Date',
parse_dates=['Date'],
)
MSFTy = msftstock.Close
plt.figure(figsize=(12,6))
plt.title('MSFT Stock Close')
plt.xlabel('Date')
plt.ylabel('Price')
plt.plot(MSFTy,label="True Value",color='blue',alpha=0.5)
plt.show()
for i in [('Close','Open'),('High','Low'),('Volume','purple')]:
if i[0]=='Volume':
plt.figure(figsize=(12,6))
plt.title(f'MSFT Stock {i[0]}')
plt.xlabel('Date')
plt.ylabel('Amount')
plt.plot(msftstock[i[0]],color=i[1],alpha=1)
elif i[0]=='High':
plt.figure(figsize=(12,6))
plt.title(f'MSFT Stock {i[1]} and {i[0]}')
plt.xlabel('Date')
plt.ylabel('Amount')
plt.plot(msftstock[i[1]],label = 'Low Point',color ='red',alpha=1)
plt.plot(msftstock[i[0]],color='#439E60',alpha=1,label = 'High Point')
plt.legend(['Low Point','High Point'])
elif i[0]=='Close':
'blue'
plt.figure(figsize=(12,6))
plt.title(f'MSFT Stock {i[1]} and {i[0]}')
plt.xlabel('Date')
plt.ylabel('Amount')
plt.plot(msftstock[i[1]],label = 'Stock Open',color ='gold',alpha=1)
plt.plot(msftstock[i[0]],color='blue',alpha=1,label = 'Stock Close')
plt.legend(['Stock Open','Stock Close'])
msftstock
Open | High | Low | Close | Volume | |
---|---|---|---|---|---|
Date | |||||
2002-06-13 05:00:00 | 27.420 | 27.7400 | 27.100 | 27.110 | 79771600 |
2002-06-14 05:00:00 | 26.575 | 27.7750 | 26.500 | 27.625 | 109433600 |
2002-06-17 05:00:00 | 27.830 | 28.2200 | 27.685 | 27.840 | 94486600 |
2002-06-18 05:00:00 | 27.765 | 28.1450 | 27.675 | 27.995 | 77518400 |
2002-06-19 05:00:00 | 27.735 | 27.9700 | 27.160 | 27.180 | 81247000 |
... | ... | ... | ... | ... | ... |
2022-06-07 05:00:00 | 266.635 | 273.1300 | 265.940 | 272.500 | 22860677 |
2022-06-08 05:00:00 | 271.710 | 273.0000 | 269.610 | 270.410 | 17372341 |
2022-06-09 05:00:00 | 267.780 | 272.7081 | 264.630 | 264.790 | 26439728 |
2022-06-10 05:00:00 | 260.580 | 260.5800 | 252.530 | 252.990 | 31445841 |
2022-06-13 05:00:00 | 245.110 | 249.0242 | 241.530 | 242.260 | 46135788 |
5036 rows × 5 columns
plot_pacf(MSFTy, lags=100,color = 'blue',vlines_kwargs={"colors": 'blue'},alpha=0.05)
plt.show()
plot_acf(MSFTy.diff().dropna(),color='blue',vlines_kwargs={'colors':'blue'},alpha=0.05)
plt.show()
plot_lags(MSFTy,lags=6)
plt.show()
plot_periodogram(MSFTy)
<AxesSubplot:title={'center':'Periodogram'}, ylabel='Variance'>
X_train, X_test, y_train, y_test = train_test_split(msft, MSFTy, test_size=20, shuffle=False)
rcParams['figure.figsize'] = 10, 6
df_diff = MSFTy
moving_avg = df_diff.rolling(14).mean()
std_dev = df_diff.rolling(14).std()
plt.legend(loc='best')
plt.title('Moving Average')
plt.plot(std_dev, color ="black", label = "Standard Deviation")
plt.plot(moving_avg, color="red", label = "Mean")
plt.legend(['Standard Deviation','Mean'])
plt.show()
#moving_avg
No handles with labels found to put in legend.
from statsmodels.tsa.seasonal import seasonal_decompose
result = seasonal_decompose(MSFTy, model='additive',period=20)
plt.figure(figsize=(15,10))
fig, axes = plt.subplots(4, 1, sharex=True)
result.observed.plot(ax=axes[0], legend=False, color='#D53BFF')
axes[0].set_ylabel('Closing Price')
result.trend.plot(ax=axes[1], legend=False, color='#8F5004')
axes[1].set_ylabel('Trend')
result.seasonal.plot(ax=axes[2], legend=False,color='grey')
axes[2].set_ylabel('Seasonal')
result.resid.plot(ax=axes[3], legend=False, color='blue')
axes[3].set_ylabel('Residuals')
plt.show()
#fig.set_size_inches(15, 10)
<Figure size 1080x720 with 0 Axes>
msftlag = make_lags(msftstock,lags=1)
msft = msftlag.fillna(0)
msft
y_lag_1 | |||||
---|---|---|---|---|---|
Open | High | Low | Close | Volume | |
Date | |||||
2002-06-13 05:00:00 | 0.000 | 0.0000 | 0.000 | 0.000 | 0.0 |
2002-06-14 05:00:00 | 27.420 | 27.7400 | 27.100 | 27.110 | 79771600.0 |
2002-06-17 05:00:00 | 26.575 | 27.7750 | 26.500 | 27.625 | 109433600.0 |
2002-06-18 05:00:00 | 27.830 | 28.2200 | 27.685 | 27.840 | 94486600.0 |
2002-06-19 05:00:00 | 27.765 | 28.1450 | 27.675 | 27.995 | 77518400.0 |
... | ... | ... | ... | ... | ... |
2022-06-07 05:00:00 | 272.060 | 274.1800 | 267.220 | 268.750 | 22400342.0 |
2022-06-08 05:00:00 | 266.635 | 273.1300 | 265.940 | 272.500 | 22860677.0 |
2022-06-09 05:00:00 | 271.710 | 273.0000 | 269.610 | 270.410 | 17372341.0 |
2022-06-10 05:00:00 | 267.780 | 272.7081 | 264.630 | 264.790 | 26439728.0 |
2022-06-13 05:00:00 | 260.580 | 260.5800 | 252.530 | 252.990 | 31445841.0 |
5036 rows × 5 columns
X_train, X_test, y_train, y_test = train_test_split(msft, MSFTy, test_size=20, shuffle=False)
from sklearn.linear_model import LinearRegression
movavgtrain = y_train.rolling(7).mean()
matrain = movavgtrain.loc[movavgtrain.index[6:]]
time = list(range(len(y_train.index)))
time2 = list(range(len(y_train.index),len(y_train.index)+len(y_test.index)))
detrend = pd.DataFrame(list(zip(time,y_train.values.tolist())),index = y_train.index,columns = ['Time','Vals'])
TrendMod = LinearRegression(positive=True)
TrendMod.fit(detrend.loc[:,['Time']],detrend.loc[:,'Vals'])
y_pred = pd.Series(TrendMod.predict(detrend.loc[:,['Time']]), index=detrend.index)
y_resid = y_train - y_pred
y_vresid = y_test - pd.Series(TrendMod.predict(pd.DataFrame(time2,columns=['time']).loc[:,['time']]), index=y_test.index)
plt.figure(figsize=(12,6))
plt.title('MSFT')
plt.xlabel('Date')
plt.ylabel('Price')
plt.plot(y_train,label="Closing Price",color='blue',alpha=0.5)
plt.plot(y_pred,label = 'Trend',color = '#951DFF')
plt.legend(['Closing Price','Trend'])
plt.show()
plot_pacf(y_resid, lags=100,color = 'blue',vlines_kwargs={"colors": 'blue'},alpha=0.01)
plt.show()
# Fit and predict
model = LinearRegression(fit_intercept=False) # `fit_intercept=True` since we didn't use DeterministicProcess
model.fit(X_train, y_train)
y_pred = pd.Series(model.predict(X_train), index=X_train.index)
y_fore = pd.Series(model.predict(X_test), index=X_test.index)
rmse_train = mean_squared_error(y_train, y_pred) ** 0.5
rmse_valid = mean_squared_error(y_test, y_fore) ** 0.5
#print(f'Training RMSE: {rmse_train:.5f}')
txt = ''
print('')
print(f'{txt:50} Validation RMSE: {rmse_valid:.5f}')
plt.figure(figsize=(12,5), dpi=100)
ax = MSFTy[MSFTy.index[4700:5016]].plot(**plot_params)
ax = y_pred[y_pred.index[4700:5016]].plot(ax=ax,color='#BF3EFF')
_ = y_fore.plot(ax=ax, color='yellow')
_ = y_test.plot(ax=ax, color='green')
plt.title("Microsoft Stock Prediction")
plt.ylabel('Price')
plt.legend(["True Close (Training)","Predicted Close",'Forecasted Close Using Previous Close',
'True Close (Validation)'])
plt.show()
Validation RMSE: 6.16916
autoARIMA = auto_arima(y_resid, start_p=0, start_q=0, test='adf', # use adftest to find optimal 'd'
max_p=3,max_d=3, max_q=3,d=None, # frequency of series, max p and q tested, and let the model determine d
start_P=0,start_Q=0,D=1, max_P = 2,max_Q=2,max_D=2,
trace=True, max_order = 10, error_action='ignore',
suppress_warnings=True, stepwise=True)
print(autoARIMA.summary())
Performing stepwise search to minimize aic ARIMA(0,1,0)(0,0,0)[0] intercept : AIC=19934.611, Time=0.14 sec ARIMA(1,1,0)(0,0,0)[0] intercept : AIC=19762.343, Time=0.39 sec ARIMA(0,1,1)(0,0,0)[0] intercept : AIC=19774.619, Time=0.51 sec ARIMA(0,1,0)(0,0,0)[0] : AIC=19932.740, Time=0.12 sec ARIMA(2,1,0)(0,0,0)[0] intercept : AIC=19762.298, Time=0.54 sec ARIMA(3,1,0)(0,0,0)[0] intercept : AIC=19763.642, Time=0.65 sec ARIMA(2,1,1)(0,0,0)[0] intercept : AIC=19763.903, Time=2.35 sec ARIMA(1,1,1)(0,0,0)[0] intercept : AIC=19762.544, Time=1.00 sec ARIMA(3,1,1)(0,0,0)[0] intercept : AIC=19765.665, Time=0.99 sec ARIMA(2,1,0)(0,0,0)[0] : AIC=19760.471, Time=0.26 sec ARIMA(1,1,0)(0,0,0)[0] : AIC=19760.523, Time=0.16 sec ARIMA(3,1,0)(0,0,0)[0] : AIC=19761.811, Time=0.27 sec ARIMA(2,1,1)(0,0,0)[0] : AIC=19762.073, Time=0.87 sec ARIMA(1,1,1)(0,0,0)[0] : AIC=19760.719, Time=0.35 sec ARIMA(3,1,1)(0,0,0)[0] : AIC=19763.834, Time=0.31 sec Best model: ARIMA(2,1,0)(0,0,0)[0] Total fit time: 8.992 seconds SARIMAX Results ============================================================================== Dep. Variable: y No. Observations: 5016 Model: SARIMAX(2, 1, 0) Log Likelihood -9877.236 Date: Fri, 02 Sep 2022 AIC 19760.471 Time: 14:48:50 BIC 19780.032 Sample: 0 HQIC 19767.326 - 5016 Covariance Type: opg ============================================================================== coef std err z P>|z| [0.025 0.975] ------------------------------------------------------------------------------ ar.L1 -0.1812 0.004 -41.203 0.000 -0.190 -0.173 ar.L2 0.0203 0.004 4.544 0.000 0.012 0.029 sigma2 3.0077 0.018 171.375 0.000 2.973 3.042 =================================================================================== Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 108267.33 Prob(Q): 0.98 Prob(JB): 0.00 Heteroskedasticity (H): 35.49 Skew: -0.55 Prob(H) (two-sided): 0.00 Kurtosis: 25.74 =================================================================================== Warnings: [1] Covariance matrix calculated using the outer product of gradients (complex-step).
autoARIMA.plot_diagnostics()
plt.show()
ARUHere = ARIMA(y_resid, order=(2, 1, 0))
fitted = ARUHere.fit()
print(fitted.summary())
C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:581: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting. warnings.warn('A date index has been provided, but it has no' C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:581: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting. warnings.warn('A date index has been provided, but it has no' C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\tsa\base\tsa_model.py:581: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting. warnings.warn('A date index has been provided, but it has no'
SARIMAX Results ============================================================================== Dep. Variable: y No. Observations: 5016 Model: ARIMA(2, 1, 0) Log Likelihood -9877.236 Date: Fri, 02 Sep 2022 AIC 19760.471 Time: 16:26:13 BIC 19780.032 Sample: 0 HQIC 19767.326 - 5016 Covariance Type: opg ============================================================================== coef std err z P>|z| [0.025 0.975] ------------------------------------------------------------------------------ ar.L1 -0.1812 0.004 -41.203 0.000 -0.190 -0.173 ar.L2 0.0203 0.004 4.544 0.000 0.012 0.029 sigma2 3.0077 0.018 171.375 0.000 2.973 3.042 =================================================================================== Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 108267.33 Prob(Q): 0.98 Prob(JB): 0.00 Heteroskedasticity (H): 35.49 Skew: -0.55 Prob(H) (two-sided): 0.00 Kurtosis: 25.74 =================================================================================== Warnings: [1] Covariance matrix calculated using the outer product of gradients (complex-step).
fc = fitted.forecast(20,return_conf_int = True, alpha=0.05) # 95% confidence
fc_series = pd.Series(fc.tolist(), index=y_test.index)+pd.Series(TrendMod.predict(pd.DataFrame(time2,columns=['time']).loc[:,['time']]), index=y_test.index)
#lower_series = pd.Series(conf[:, 0], index=y_test.index)
#upper_series = pd.Series(conf[:, 1], index=y_test.index)
rmse_valid = mean_squared_error(y_test, fc_series) ** 0.5
print('')
txt = ''
print(f'{txt:50} Validation RMSE: {rmse_valid:.5f}')
plt.figure(figsize=(12,5), dpi=100)
plt.plot(y_train[y_train.index[4700:5016]], color = '#A17EFF',label='Training')
plt.plot(y_test, color = 'blue', label='Actual Stock Price')
plt.plot(fc_series, color = 'orange',label='Predicted Stock Price Over 20 Days')
#plt.fill_between(lower_series.index, lower_series, upper_series,
#color='k', alpha=.10)
plt.title('Microsoft Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Actual Stock Price')
plt.legend(loc='upper left', fontsize=8)
plt.show()
Validation RMSE: 9.08409
import statsmodels.api as sm
import warnings
sarima=sm.tsa.statespace.SARIMAX(y_resid,
order=(2, 1, 0),
seasonal_order=(2, 1, 0, 24))
sarm=sarima.fit()
print(sarm.summary())
SARIMAX Results ========================================================================================== Dep. Variable: y No. Observations: 5016 Model: SARIMAX(2, 1, 0)x(2, 1, 0, 24) Log Likelihood -10518.605 Date: Sat, 03 Sep 2022 AIC 21047.210 Time: 15:50:45 BIC 21079.787 Sample: 0 HQIC 21058.629 - 5016 Covariance Type: opg ============================================================================== coef std err z P>|z| [0.025 0.975] ------------------------------------------------------------------------------ ar.L1 -0.1720 0.004 -40.291 0.000 -0.180 -0.164 ar.L2 0.0413 0.005 8.532 0.000 0.032 0.051 ar.S.L24 -0.6696 0.005 -123.338 0.000 -0.680 -0.659 ar.S.L48 -0.3507 0.006 -60.259 0.000 -0.362 -0.339 sigma2 3.9532 0.023 168.941 0.000 3.907 3.999 =================================================================================== Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 95087.39 Prob(Q): 0.97 Prob(JB): 0.00 Heteroskedasticity (H): 35.76 Skew: -0.58 Prob(H) (two-sided): 0.00 Kurtosis: 24.35 =================================================================================== Warnings: [1] Covariance matrix calculated using the outer product of gradients (complex-step).
fc = sarm.forecast(20, return_conf_int = True, alpha=0.05) # 95% confidence
fc_series = pd.Series(fc.tolist(), index=y_test.index)+pd.Series(TrendMod.predict(pd.DataFrame(time2,columns=['time']).loc[:,['time']]), index=y_test.index)
#lower_series = pd.Series(conf[:, 0], index=y_test.index)
#upper_series = pd.Series(conf[:, 1], index=y_test.index)
rmse_valid = mean_squared_error(y_test, fc_series) ** 0.5
print('')
txt = ''
print(f'{txt:50} Validation RMSE: {rmse_valid:.5f}')
plt.figure(figsize=(12,5), dpi=100)
plt.plot(y_train[y_train.index[4700:5016]], color = '#A17EFF',label='Training')
plt.plot(y_test, color = 'blue', label='Actual Stock Price')
plt.plot(fc_series, color = 'orange',label='Predicted Stock Price Over 20 Days')
#plt.fill_between(lower_series.index, lower_series, upper_series,
#color='k', alpha=.10)
plt.title('Microsoft Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Actual Stock Price')
plt.legend(loc='upper left', fontsize=8)
plt.show()
Validation RMSE: 8.05865
X_train.columns
MultiIndex([('y_lag_1', 'Close'), ('y_lag_1', 'Volume'), ('y_lag_1', 'Open'), ('y_lag_1', 'High'), ('y_lag_1', 'Low')], )
yres_full = MSFTy-pd.Series(TrendMod.predict(pd.DataFrame(list(range(len(MSFTy))),columns=['time']).loc[:,['time']]), index=MSFTy.index)
base = datetime.strptime('07/13/2022','%m/%d/%Y')
timeind = [base - timedelta(days=x) for x in range(30)]
weekhol = [datetime(2022, 7, 10, 0, 0),datetime(2022, 7, 9, 0, 0),
datetime(2022, 7, 4, 0, 0),datetime(2022, 7, 3, 0, 0),datetime(2022, 7, 2, 0, 0),
datetime(2022, 6, 26, 0, 0),datetime(2022, 6, 25, 0, 0),datetime(2022,6,20,0,0),datetime(2022, 6, 19, 0, 0),
datetime(2022, 6, 18, 0, 0)]
for i in weekhol:
timeind.remove(i)
timeind
[datetime.datetime(2022, 7, 13, 0, 0), datetime.datetime(2022, 7, 12, 0, 0), datetime.datetime(2022, 7, 11, 0, 0), datetime.datetime(2022, 7, 8, 0, 0), datetime.datetime(2022, 7, 7, 0, 0), datetime.datetime(2022, 7, 6, 0, 0), datetime.datetime(2022, 7, 5, 0, 0), datetime.datetime(2022, 7, 1, 0, 0), datetime.datetime(2022, 6, 30, 0, 0), datetime.datetime(2022, 6, 29, 0, 0), datetime.datetime(2022, 6, 28, 0, 0), datetime.datetime(2022, 6, 27, 0, 0), datetime.datetime(2022, 6, 24, 0, 0), datetime.datetime(2022, 6, 23, 0, 0), datetime.datetime(2022, 6, 22, 0, 0), datetime.datetime(2022, 6, 21, 0, 0), datetime.datetime(2022, 6, 17, 0, 0), datetime.datetime(2022, 6, 16, 0, 0), datetime.datetime(2022, 6, 15, 0, 0), datetime.datetime(2022, 6, 14, 0, 0)]
FinMod=sm.tsa.statespace.SARIMAX(yres_full,
order=(2, 1, 0),
seasonal_order=(2, 1, 0, 24))
FM=FinMod.fit()
print(FM.summary())
SARIMAX Results ========================================================================================== Dep. Variable: y No. Observations: 5036 Model: SARIMAX(2, 1, 0)x(2, 1, 0, 24) Log Likelihood -10639.312 Date: Sat, 03 Sep 2022 AIC 21288.625 Time: 15:49:45 BIC 21321.222 Sample: 0 HQIC 21300.048 - 5036 Covariance Type: opg ============================================================================== coef std err z P>|z| [0.025 0.975] ------------------------------------------------------------------------------ ar.L1 -0.1622 0.004 -37.501 0.000 -0.171 -0.154 ar.L2 0.0427 0.005 8.817 0.000 0.033 0.052 ar.S.L24 -0.6698 0.005 -124.638 0.000 -0.680 -0.659 ar.S.L48 -0.3443 0.006 -59.954 0.000 -0.356 -0.333 sigma2 4.0793 0.025 165.977 0.000 4.031 4.127 =================================================================================== Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 87864.69 Prob(Q): 0.98 Prob(JB): 0.00 Heteroskedasticity (H): 37.01 Skew: -0.61 Prob(H) (two-sided): 0.00 Kurtosis: 23.48 =================================================================================== Warnings: [1] Covariance matrix calculated using the outer product of gradients (complex-step).
fc = FM.forecast(20, return_conf_int = True, alpha=0.05) # 95% confidence
fc_series = pd.Series(fc.tolist(), index=timeind)+pd.Series(TrendMod.predict(pd.DataFrame(list(range(5036,5056)),columns=['time']).loc[:,['time']]), index=timeind)
#lower_series = pd.Series(conf[:, 0], index=y_test.index)
#upper_series = pd.Series(conf[:, 1], index=y_test.index)
#rmse_valid = mean_squared_error(y_test, fc_series) ** 0.5
#print('')
#txt = ''
#print(f'{txt:50} Validation RMSE: {rmse_valid:.5f}')
plt.figure(figsize=(12,5), dpi=100)
plt.plot(MSFTy[MSFTy.index[4700:5036]], color = '#A17EFF',label='Training')
#plt.plot(y_test, color = 'blue', label='Actual Stock Price')
plt.plot(fc_series, color = 'orange',label='Predicted Stock Price Over 20 Days')
#plt.fill_between(lower_series.index, lower_series, upper_series,
#color='k', alpha=.10)
plt.title('Microsoft Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Actual Stock Price')
plt.legend(loc='upper left', fontsize=8)
plt.show()
fc_series
2022-07-13 82.544858 2022-07-12 73.916053 2022-07-11 70.791733 2022-07-08 67.419968 2022-07-07 62.652684 2022-07-06 67.681334 2022-07-05 64.246383 2022-07-01 59.461628 2022-06-30 58.207507 2022-06-29 66.084148 2022-06-28 68.543887 2022-06-27 67.942625 2022-06-24 68.607264 2022-06-23 72.733913 2022-06-22 70.512375 2022-06-21 73.315283 2022-06-17 77.664643 2022-06-16 72.034219 2022-06-15 76.256502 2022-06-14 78.202710 dtype: float64
check = pd.read_csv('msftcheck.csv')
check.Close
0 252.72 1 253.67 2 264.51 3 267.66 4 268.40 5 266.21 6 262.85 7 259.58 8 256.83 9 260.26 10 256.48 11 264.89 12 267.70 13 258.86 14 253.13 15 253.74 16 247.65 17 244.97 18 251.76 19 244.49 Name: Close, dtype: float64
rmse_check = mean_squared_error(check.Close, fc_series) ** 0.5
print('')
txt = ''
print(f'{txt:50} RMSE: {rmse_check:.5f}')
RMSE: 27.71746