Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
n = x.shape[0]
# check on status of null
null = self.null
# fit a model on an arange to determine the residuals
if null == 'trend':
t = np.arange(n).reshape(n, 1)
# these numbers came out of the R code.. I've found 0 doc for these
table = c(0.216, 0.176, 0.146, 0.119)
elif null == 'level':
t = np.ones(n).reshape(n, 1)
# these numbers came out of the R code.. I've found 0 doc for these
table = c(0.739, 0.574, 0.463, 0.347)
else:
raise ValueError("null must be one of %r" % self._valid)
# fit the model
lm = LinearRegression().fit(t, x)
e = x - lm.predict(t) # residuals
s = np.cumsum(e)
eta = (s * s).sum() / (n**2)
s2 = (e * e).sum() / n
# scalar, denom = 10, 14
# if self.lshort:
# scalar, denom = 3, 13
# l_ = int(np.trunc(scalar * np.sqrt(n) / denom))
if self.lshort:
Notes
-----
This test is generally not used directly, but in conjunction with
:func:`pmdarima.arima.nsdiffs`, which directly estimates the number
of seasonal differences.
References
----------
.. [1] Testing for seasonal stability using the Canova
and Hansen test statisic: http://bit.ly/2wKkrZo
.. [2] R source code for CH test:
https://github.com/robjhyndman/forecast/blob/master/R/arima.R#L148
"""
crit_vals = c(0.4617146, 0.7479655, 1.0007818,
1.2375350, 1.4625240, 1.6920200,
1.9043096, 2.1169602, 2.3268562,
2.5406922, 2.7391007)
def __init__(self, m):
super(CHTest, self).__init__(m=m)
@staticmethod
def _sd_test(wts, s):
# assume no NaN values since called internally
# also assume s > 1 since called internally
n = wts.shape[0]
# no use checking, because this is an internal method
# if n <= s: raise ValueError('too few samples (%i<=%i)' % (n, s))
frec = np.ones(int((s + 1) / 2), dtype=np.int)
if not self._base_case(x):
return np.nan, False
# ensure vector
x = check_endog(x, dtype=DTYPE)
n = x.shape[0]
# check on status of null
null = self.null
# fit a model on an arange to determine the residuals
if null == 'trend':
t = np.arange(n).reshape(n, 1)
# these numbers came out of the R code.. I've found 0 doc for these
table = c(0.216, 0.176, 0.146, 0.119)
elif null == 'level':
t = np.ones(n).reshape(n, 1)
# these numbers came out of the R code.. I've found 0 doc for these
table = c(0.739, 0.574, 0.463, 0.347)
else:
raise ValueError("null must be one of %r" % self._valid)
# fit the model
lm = LinearRegression().fit(t, x)
e = x - lm.predict(t) # residuals
s = np.cumsum(e)
eta = (s * s).sum() / (n**2)
s2 = (e * e).sum() / n
References
----------
.. [1] https://wikipedia.org/wiki/Augmented_Dickey–Fuller_test
.. [2] R's tseries ADF source code: https://bit.ly/2EnvM5V
"""
table = np.array([
(-4.38, -3.95, -3.60, -3.24, -1.14, -0.80, -0.50, -0.15),
(-4.15, -3.80, -3.50, -3.18, -1.19, -0.87, -0.58, -0.24),
(-4.04, -3.73, -3.45, -3.15, -1.22, -0.90, -0.62, -0.28),
(-3.99, -3.69, -3.43, -3.13, -1.23, -0.92, -0.64, -0.31),
(-3.98, -3.68, -3.42, -3.13, -1.24, -0.93, -0.65, -0.32),
(-3.96, -3.66, -3.41, -3.12, -1.25, -0.94, -0.66, -0.33)
])
tablen = table.shape[1]
tableT = c(25, 50, 100, 250, 500, 100000)
tablep = c(0.01, 0.025, 0.05, 0.10, 0.90, 0.95, 0.975, 0.99)
def __init__(self, alpha=0.05, k=None):
super(ADFTest, self).__init__(alpha=alpha)
self.k = k
if k is not None and k < 0:
raise ValueError('k must be a positive integer (>= 0)')
@staticmethod
def _ols(x, y, z, k):
n = y.shape[0]
yt = z[:, 0] # type: np.ndarray
tt = np.arange(k - 1, n)
# R does [k:n].. but that's 1-based indexing and inclusive on the tail
yleft : float, optional (default=None)
The value to be returned when input ``x`` values are less than
``min(x)``. The default is defined by the value of rule given below.
yright : float, optional (default=None)
The value to be returned when input ``x`` values are greater than
``max(x)``. The default is defined by the value of rule given below.
ties : str, optional (default='mean')
Handling of tied ``x`` values. Choices are "mean" or "ordered".
"""
if method not in VALID_APPROX:
raise ValueError('method must be one of %r' % VALID_APPROX)
# make sure xout is an array
xout = c(xout).astype(np.float64) # ensure double
# check method
method_key = method
# not a callable, actually, but serves the purpose..
method = get_callable(method_key, VALID_APPROX)
# copy/regularize vectors
x, y = _regularize(x, y, ties)
nx = x.shape[0]
# if len 1? (we've already handled where the size is 0, since we check that
# in the _regularize function when we call c1d)
if nx == 1:
if method_key == 'linear':
raise ValueError('need at least two points to '
lshort : bool, optional (default=True)
Whether or not to truncate the ``l`` value in the C code.
Notes
-----
This test is generally used indirectly via the
:func:`pmdarima.arima.ndiffs` function, which computes the
differencing term, ``d``.
References
----------
.. [1] R's tseries KPSS test source code: http://bit.ly/2eJP1IU
"""
_valid = {'trend', 'null'}
tablep = c(0.01, 0.025, 0.05, 0.10)
def __init__(self, alpha=0.05, null='level', lshort=True):
super(KPSSTest, self).__init__(alpha=alpha)
self.null = null
self.lshort = lshort
def should_diff(self, x):
"""Test whether the time series is stationary or needs differencing.
Parameters
----------
x : array-like, shape=(n_samples,)
The time series vector.
Returns
.. [1] R's tseries PP test source code: http://bit.ly/2wbzx6V
"""
table = -np.array([
(22.5, 25.7, 27.4, 28.4, 28.9, 29.5),
(19.9, 22.4, 23.6, 24.4, 24.8, 25.1),
(17.9, 19.8, 20.7, 21.3, 21.5, 21.8),
(15.6, 16.8, 17.5, 18.0, 18.1, 18.3),
(3.66, 3.71, 3.74, 3.75, 3.76, 3.77),
(2.51, 2.60, 2.62, 2.64, 2.65, 2.66),
(1.53, 1.66, 1.73, 1.78, 1.78, 1.79),
(0.43, 0.65, 0.75, 0.82, 0.84, 0.87)
]).T
tablen = table.shape[1]
tableT = c(25, 50, 100, 250, 500, 100000).astype(DTYPE)
tablep = c(0.01, 0.025, 0.05, 0.10, 0.90, 0.95, 0.975, 0.99)
def __init__(self, alpha=0.05, lshort=True):
super(PPTest, self).__init__(alpha=alpha)
self.lshort = lshort
def should_diff(self, x):
"""Test whether the time series is stationary or needs differencing.
Parameters
----------
x : array-like, shape=(n_samples,)
The time series vector.
Returns
-------
----------
.. [1] R's tseries PP test source code: http://bit.ly/2wbzx6V
"""
table = -np.array([
(22.5, 25.7, 27.4, 28.4, 28.9, 29.5),
(19.9, 22.4, 23.6, 24.4, 24.8, 25.1),
(17.9, 19.8, 20.7, 21.3, 21.5, 21.8),
(15.6, 16.8, 17.5, 18.0, 18.1, 18.3),
(3.66, 3.71, 3.74, 3.75, 3.76, 3.77),
(2.51, 2.60, 2.62, 2.64, 2.65, 2.66),
(1.53, 1.66, 1.73, 1.78, 1.78, 1.79),
(0.43, 0.65, 0.75, 0.82, 0.84, 0.87)
]).T
tablen = table.shape[1]
tableT = c(25, 50, 100, 250, 500, 100000).astype(DTYPE)
tablep = c(0.01, 0.025, 0.05, 0.10, 0.90, 0.95, 0.975, 0.99)
def __init__(self, alpha=0.05, lshort=True):
super(PPTest, self).__init__(alpha=alpha)
self.lshort = lshort
def should_diff(self, x):
"""Test whether the time series is stationary or needs differencing.
Parameters
----------
x : array-like, shape=(n_samples,)
The time series vector.
Returns