Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Parameters
----------
X : array-like, pandas DataFrame or Series, shape (n_samples, ...)
The training input samples.
Returns
-------
y : ndarray, shape (n_samples,)
Returns the dummy predictions
"""
X = check_ts_array(X)
check_is_fitted(self, 'is_fitted_')
return np.ones(X.shape[0], dtype=np.int64) * self.theta_
class TSExampleClassifier(BaseClassifier):
""" An example regressor that makes use of the xpandas input.
"""
def __init__(self, func=np.mean, columns=None, estimator=RandomForestClassifier()):
self.func = func
self.columns = columns
self.estimator = estimator
def fit(self, X, y):
""" A reference implementation of a fitting function.
Parameters
----------
X : array-like, pandas DataFrame or Series, shape (n_samples, ...)
The training input samples.
y : array-like, pandas dataFrame series, shape (n_samples,)
get_gain=proximity.get_gain,
verbosity=proximity.verbosity,
n_jobs=proximity.n_jobs
)
# grow the stump
stump.fit(proximity.X, proximity.y)
stump.grow()
stumps.append(stump)
# pick the best stump based upon gain
stump = comparison.max(stumps, proximity.random_state, lambda stump: stump.entropy)
return stump
return find_best_stump
class ProximityStump(BaseClassifier):
"""
Proximity Stump class to model a decision stump which uses a distance measure to partition data.
Attributes:
label_encoder: label encoder to change string labels to numeric indices
y_exemplar: class label list of the exemplar instances
X_exemplar: dataframe of the exemplar instances
X_branches: dataframes for each branch, one per exemplar
y_branches: class label list for each branch, one per exemplar
classes_: unique list of classes
entropy: the gain associated with the split of data
random_state: the random state
get_exemplars: function to extract exemplars from a dataframe and class value list
setup_distance_measure: function to setup the distance measure getters from dataframe and class value list
get_distance_measure: distance measure getters
import gc
import os
from pathlib import Path
import numpy as np
import pandas as pd
from sklearn.base import clone
from sklearn.utils.multiclass import class_distribution
from sktime.classifiers.base import BaseClassifier
from sktime.utils.validation.supervised import validate_X
from tensorflow import keras
from sktime_dl.deeplearning import InceptionTimeClassifier
class DeepLearnerEnsembleClassifier(BaseClassifier):
"""
Simplified/streamlined class to ensemble over homogeneous network
architectures with different random initialisations
This may be refactored to use standard scikit-learn ensemble mechanisms in
the future, currently somewhat bespoke
for speed of implementation
Originally proposed by:
@article{fawaz2019deep,
title={Deep neural network ensembles for time series classification},
author={Fawaz, H Ismail and Forestier, Germain and Weber, Jonathan and
Idoumghar, Lhassane and Muller, P},
journal={arXiv preprint arXiv:1903.06602},
year={2019}
def __init__(self, estimator, param_grid, scoring=None, fit_params=None,
n_jobs=None, iid='warn', refit=True, cv='warn', verbose=0,
pre_dispatch='2*n_jobs', error_score='raise-deprecating',
return_train_score="warn"):
super(GridSearchCV, self).__init__(estimator, param_grid, scoring=scoring, fit_params=fit_params,
n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
pre_dispatch=pre_dispatch, error_score=error_score,
return_train_score=return_train_score)
if self.scoring is None:
# using accuracy score as default for classifiers
if isinstance(self.estimator, BaseClassifier):
self.scoring = make_scorer(accuracy_score)
# using mean squared error as default for regressors
elif isinstance(self.estimator, BaseRegressor):
self.scoring = make_scorer(mean_squared_error)
CLASSIFICATION,
REGRESSION,
GRIDSEARCH_NUM_CV_FOLDS,
GRIDSEARCH_CV_NUM_PARALLEL_JOBS,
VERBOSE)
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
import numpy as np
from sktime.classifiers.base import BaseClassifier
from sktime.regressors.base import BaseRegressor
class Decision_Tree_Classifier(BaseClassifier):
"""
Wrapper for `sklearn Decision Tree Classifier `_.
"""
def __init__(self, hyperparameters=None,
n_jobs=GRIDSEARCH_CV_NUM_PARALLEL_JOBS,
cv=GRIDSEARCH_NUM_CV_FOLDS):
self.fitted_classifier = None
self.n_jobs = n_jobs
self.cv = cv
if hyperparameters is None:
self.hyperparameters = {"max_depth": [10,100, None],
"criterion": ['gini', 'entropy'],
"max_features": ['auto', 'sqrt','log2'],
"min_samples_leaf":np.arange(1,11)}
else:
self.hyperparameters=hyperparameters
import numpy as np
from sklearn import neighbors
from sklearn.model_selection import GridSearchCV
from mlaut.shared.static_variables import GRIDSEARCH_CV_NUM_PARALLEL_JOBS, GRIDSEARCH_NUM_CV_FOLDS
from sktime.classifiers.base import BaseClassifier
class K_Neighbours(BaseClassifier):
"""
Wrapper for `sklearn KNeighbours classifier `_.
"""
def __init__(self,
hyperparameters={
'n_neighbors': np.arange(1,31),
'p': [1, 2]
},
n_jobs=GRIDSEARCH_CV_NUM_PARALLEL_JOBS,
cv=GRIDSEARCH_NUM_CV_FOLDS):
self.fitted_classifier = None
self.n_jobs = n_jobs
self.cv = cv
self.hyperparameters=hyperparameters
indices = np.ravel(indices)
sub_tree = self.branches[index]
if sub_tree is None:
sub_distribution = np.zeros((1, n_classes))
class_label = self.stump.y_exemplar[index]
sub_distribution[0][class_label] = 1
else:
sub_X = X.iloc[indices, :]
sub_distribution = sub_tree.predict_proba(sub_X)
assert sub_distribution.shape[1] == n_classes
np.add.at(distribution, indices, sub_distribution)
normalize(distribution, copy=False, norm='l1')
return distribution
class ProximityForest(BaseClassifier):
"""
Proximity Forest class to model a decision tree forest which uses distance measures to
partition data.
@article{lucas19proximity,
title={Proximity Forest: an effective and scalable distance-based classifier for time series},
author={B. Lucas and A. Shifaz and C. Pelletier and L. O’Neill and N. Zaidi and B. Goethals and F. Petitjean and G. Webb},
journal={Data Mining and Knowledge Discovery},
volume={33},
number={3},
pages={607--635},
year={2019}
}
from sklearn.model_selection import GridSearchCV
from mlaut.shared.static_variables import(GRIDSEARCH_NUM_CV_FOLDS,
GRIDSEARCH_CV_NUM_PARALLEL_JOBS)
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import BaggingRegressor
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
import numpy as np
from sktime.classifiers.base import BaseClassifier
from sktime.regressors.base import BaseRegressor
class Random_Forest_Classifier(BaseClassifier):
"""
Wrapper for `sklearn Random Forest Classifier `_.
"""
def __init__(self, hyperparameters=None,
n_jobs=GRIDSEARCH_CV_NUM_PARALLEL_JOBS,
cv=GRIDSEARCH_NUM_CV_FOLDS):
self.fitted_classifier = None
self.n_jobs = n_jobs
self.cv = cv
if hyperparameters is None:
self.hyperparameters = hyperparameters = {"max_depth": [10,100, None],
"max_features": ['auto', 'sqrt','log2', None],
"min_samples_split": [2, 3, 10],
"bootstrap": [True, False],
"criterion": ["gini", "entropy"],
"n_estimators": [10, 100, 200, 500]}
import numpy as np
import random
import sys
import pandas as pd
import time
import math
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.utils.multiclass import class_distribution
from sktime.transformers.shapelets import ContractedShapeletTransform
from sktime.classifiers.base import BaseClassifier
class ShapeletTransformClassifier(BaseClassifier):
""" Shapelet Transform Classifier
Basic implementation along the lines of
@article{hills14shapelet,
title={Classification of time series by shapelet transformation},
author={J. Hills and J. Lines and E. Baranauskas and J. Mapp and A. Bagnall},
journal={Data Mining and Knowledge Discovery},
volume={28},
number={4},
pages={851--881},
year={2014}
}
but with some of the refinements presented in
@article{bostrom17binary,
author={A. Bostrom and A. Bagnall},
title={Binary Shapelet Transform for Multiclass Time Series Classification},
def __init__(self, estimator, param_grid, scoring=None, fit_params=None,
n_jobs=None, iid='warn', refit=True, cv='warn', verbose=0,
pre_dispatch='2*n_jobs', error_score='raise-deprecating',
return_train_score="warn"):
super().__init__(estimator, param_grid, scoring=None, fit_params=None,
n_jobs=None, iid='warn', refit=True, cv='warn', verbose=0,
pre_dispatch='2*n_jobs', error_score='raise-deprecating',
return_train_score="warn")
if self.scoring is None:
# using accuracy score as default for classifiers
if isinstance(self.estimator, BaseClassifier):
self.scoring = make_scorer(accuracy_score)
# using mean squared error as default for regressors
elif isinstance(self.estimator, BaseRegressor):
self.scoring = make_scorer(mean_squared_error)