Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
#dummy transform
import numpy as np
import pandas as pd
from sktime.transformers.base import BaseTransformer
class DummyTransformer(BaseTransformer):
def __init__(self, check_input=True):
self.check_input = check_input
self.num_cases = None
self.num_dimensions = None
def fit(self, X, y=None):
if self.check_input:
pass
self.num_cases, self.num_dimensions = X.shape
def transform(self, X, y=None):
if not isinstance(X, pd.DataFrame):
raise TypeError("Input should be a pandas dataframe containing Series objects")
import numpy as np
import pandas as pd
import math
import sys
from sktime.transformers.dictionary_based.SAX import BitWord
from sktime.transformers.base import BaseTransformer
# TO DO: Finish comments
class SFA(BaseTransformer):
__author__ = "Matthew Middlehurst"
""" SFA Transformer, as described in
@inproceedings{schafer2012sfa,
title={SFA: a symbolic fourier approximation and index for similarity search in high dimensional datasets},
author={Sch{\"a}fer, Patrick and H{\"o}gqvist, Mikael},
booktitle={Proceedings of the 15th International Conference on Extending Database Technology},
pages={516--527},
year={2012},
organization={ACM}
}
Overview: for each series:
run a sliding window across the series
for each window
shorten the series with DFT
# find out by how much we have to shift seasonal_components to align with new index
shift = -time_index[0] % self.sp
# align seasonal components with new starting point of new time_index
return np.roll(self.seasonal_components_, shift=shift, axis=1)
def _fit_seasonal_decomposition_model(self, X):
"""Fit seasonal decopmosition model and return fitted seasonal components"""
# statsmodels `seasonal_decompose` expects time series to be in columns, rather than rows, we therefore need to
# transpose X here
res = seasonal_decompose(X.T, model=self.model, freq=self.sp, filt=None, two_sided=True, extrapolate_trend=0)
seasonal_components = res.seasonal.T
return np.atleast_2d(seasonal_components)
class Detrender(BaseTransformer):
"""A transformer that removes trend of given polynomial order from time series/panel data
Parameters
----------
order : int
Polynomial order, zero: mean, one: linear, two: quadratic, etc
check_input : bool, optional (default=True)
When set to ``True``, inputs will be validated, otherwise inputs are assumed to be valid
and no checks are performed. Use with caution.
"""
def __init__(self, order=0):
if not (isinstance(order, int) and (order >= 0)):
raise ValueError(f"order must be a positive integer, but found: {type(order)}")
self.order = order
import numpy as np
import pandas as pd
from sklearn.utils.validation import check_is_fitted
from statsmodels.tsa.seasonal import seasonal_decompose
from sktime.transformers.base import BaseTransformer
from sktime.transformers.compose import Tabulariser
from sktime.utils.data_container import get_time_index
from sktime.utils.time_series import fit_trend, remove_trend, add_trend
from sktime.utils.validation.forecasting import validate_sp, check_is_fitted_in_transform
from sktime.utils.validation.supervised import validate_X, check_X_is_univariate
class Deseasonaliser(BaseTransformer):
"""A transformer that removes a seasonal component from time series/panel data
Parameters
----------
sp : int, optional (default=1)
Seasonal periodicity
model : str {'additive', 'multiplicative'}, optional (default='additive')
Model to use for estimating seasonal component
check_input : bool, optional (default=True)
When set to ``True``, inputs will be validated, otherwise inputs are assumed to be valid
and no checks are performed. Use with caution.
"""
def __init__(self, sp=1, model='additive', check_input=True):
self.sp = validate_sp(sp)
allowed_models = ('additive', 'multiplicative')
rows_t.append(row_t) # append transformed rows
cols_t.append(rows_t) # append transformed columns
# if series-to-series transform, flatten transformed series
Xt = concat_nested_arrays(cols_t) # concatenate transformed columns
# tabularise/unnest series-to-primitive transforms
xt = Xt.iloc[0, 0]
if isinstance(xt, (pd.Series, np.ndarray)) and len(xt) == 1:
Xt = tabularize(Xt)
return Xt
class Tabularizer(BaseTransformer):
"""
A transformer that turns time series/panel data into tabular data.
This estimator converts nested pandas dataframe containing time-series/panel data with numpy arrays or pandas Series in
dataframe cells into a tabular pandas dataframe with only primitives in cells. This is useful for transforming
time-series/panel data into a format that is accepted by standard validation learning algorithms (as in sklearn).
Parameters
----------
check_input: bool, optional (default=True)
When set to ``True``, inputs will be validated, otherwise inputs are assumed to be valid
and no checks are performed. Use with caution.
"""
# TODO: allow to keep column names, but unclear how to handle multivariate data
# filter out single points
starts = starts[lengths >= self.min_length]
lengths = lengths[lengths >= self.min_length]
self._starts.append(starts)
self._lengths.append(lengths)
# put into dataframe
Xt = pd.DataFrame()
column_prefix = "%s_%s" % (column_name, "nan" if np.isnan(self.value) else str(self.value))
Xt["%s_starts" % column_prefix] = pd.Series(self._starts)
Xt["%s_lengths" % column_prefix] = pd.Series(self._lengths)
return Xt
class DerivativeSlopeTransformer(BaseTransformer):
# TODO add docstrings
def transform(self, X, y=None):
num_cases, num_dim = X.shape
output_df = pd.DataFrame()
for dim in range(num_dim):
dim_data = X.iloc[:, dim]
out = DerivativeSlopeTransformer.row_wise_get_der(dim_data)
output_df['der_dim_' + str(dim)] = pd.Series(out)
return output_df
@staticmethod
def row_wise_get_der(X):
def get_der(x):
der = []
# dummy transform
import numpy as np
import pandas as pd
from sktime.transformers.base import BaseTransformer
from enum import Enum
class DiscreteFourierType(Enum):
STANDARD = 1
REAL = 2
HERMITIAN = 3
class DiscreteFourierTransformer(BaseTransformer):
def __init__(self, fourier_type=DiscreteFourierType.STANDARD, axis=None, norm=None, check_input=True):
if not isinstance(self.type, DiscreteFourierType):
raise TypeError("type should be defined as a DiscreteFourierTransform type")
self.check_input = check_input
self.type = fourier_type
self.norm = norm
self.axis = axis
def transform(self, X, y=None):
if not isinstance(X, pd.DataFrame):
raise TypeError("Input should be a pandas dataframe containing Series objects")
if self.type == 1:
return np.fft.fftn(X, axis=self.axis, norm=self.norm)
def __init__(self, fs=1.0, window='boxcar', nfft=None, detrend='constant', return_onesided=True, scaling='density',
axis=-1, check_input=True):
self.transform_parameters = {'fs': fs, 'window': window, 'nfft': nfft, 'detrend': detrend,
'return_onesided': return_onesided, 'scaling': scaling, 'axis': axis}
self.type_ = FunctionConfigs.FuncType.POWER_SPECTRUM
self.input_key_ = 'x'
self.check_input_ = check_input
self.is_fitted_ = True
def get_transform_params(self):
return self.transform_parameters
class CosineTransformer(BaseTransformer):
def __init__(self):
pass
def transform(self, x, y=None):
return np.cos(x)
import numpy as np
import pandas as pd
from sktime.transformers.base import BaseTransformer
from enum import Enum
__all__ = ['Resizer']
__author__ = ["Tony Bagnall"]
class ResizeType(Enum):
PADDER = 1
TRUNCATOR = 2
RESIZER = 3
class Resizer(BaseTransformer):
"""
transformer to standardise the size of the data set in one of three ways.
1. type == PADDER: Pads all series to the length of the longest series
2. type == TRUNCATOR: Shortens every series to be the same size as the smallest
3. type == RESIZER: Resizes so each series goes from start to end (inclusive). It will pad if necessary, and data at
start will be at position 0 in the new series. If this type is set transform will throw an exception if start and end are not set.
----------
in the case where there is an empty series, TRUNCATOR will make everything empty
"""
def __init__(self,type=ResizeType.PADDER, start=None, end=None, pad_value=0):
self.type=type
self.start=start
self.end=end
self.pad_value=0
# No need for fit for Padding transformer
import numpy as np
import pandas as pd
from sktime.transformers.base import BaseTransformer
from sktime.utils.load_data import load_from_tsfile_to_dataframe as load_ts
class PAA(BaseTransformer):
__author__ = "Matthew Middlehurst"
""" (PAA) Piecewise Aggregate Approximation Transformer, as described in
Eamonn Keogh, Kaushik Chakrabarti, Michael Pazzani, and Sharad Mehrotra.
Dimensionality reduction for fast similarity search in large time series databases.
Knowledge and information Systems, 3(3), 263-286, 2001.
For each series reduce the dimensionality to num_intervals, where each value is the mean of values in
the interval.
TO DO: pythonise it to make it more efficient. Maybe check vs this version
http://vigne.sh/posts/piecewise-aggregate-approx/
Could have: Tune the interval size in fit somehow?
Parameters
----------
num_intervals : int, dimension of the transformed data (default 8)
"""