Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _fit(self, X, y=None):
SymbolicAggregateApproximation._fit(self, X, y)
n_ts, sz, d = X.shape
sz_segment = sz // self.n_segments
sigma_l = self.sigma_l
if sigma_l is None:
sigma_l = numpy.sqrt(0.03 / sz_segment)
self.breakpoints_slope_ = _breakpoints(self.alphabet_size_slope,
scale=sigma_l)
self.breakpoints_slope_middle_ = _bin_medians(self.alphabet_size_slope,
scale=sigma_l)
return self
def row_col(position, n_cols=5):
idx_row = (position - 1) // n_cols
idx_col = position - n_cols * idx_row - 1
return idx_row, idx_col
def get_color(weights):
baselines = numpy.zeros((4, 3))
weights = numpy.array(weights).reshape(1, 4)
for i, c in enumerate(["r", "g", "b", "y"]):
baselines[i] = matplotlib.colors.ColorConverter().to_rgb(c)
return numpy.dot(weights, baselines).ravel()
numpy.random.seed(0)
X_train, y_train, X_test, y_test = CachedDatasets().load_dataset("Trace")
X_out = numpy.empty((4, X_train.shape[1], X_train.shape[2]))
plt.figure()
for i in range(4):
X_out[i] = X_train[y_train == (i + 1)][0]
X_out = TimeSeriesScalerMinMax().fit_transform(X_out)
for i, pos in enumerate([1, 5, 21, 25]):
plt.subplot(5, 5, pos)
w = [0.] * 4
w[i] = 1.
plt.plot(X_out[i].ravel(),
color=matplotlib.colors.rgb2hex(get_color(w)),
linewidth=2)
plt.text(X_out[i].shape[0], 0., "$X_%d$" % i,
horizontalalignment="right",
Parameters
----------
X : array-like, shape (n_ts, sz, d)
Training data.
y : array-like, shape (n_ts, )
Target values.
"""
if self.metric in VARIABLE_LENGTH_METRICS:
self._ts_metric = self.metric
self.metric = "precomputed"
X = check_array(X,
allow_nd=True,
force_all_finite=(self.metric != "precomputed"))
X = to_time_series_dataset(X)
X = check_dims(X, X_fit=None)
if self.metric == "precomputed" and hasattr(self, '_ts_metric'):
self._ts_fit = X
self._d = X.shape[2]
self._X_fit = numpy.zeros((self._ts_fit.shape[0],
self._ts_fit.shape[0]))
else:
self._X_fit, self._d = to_sklearn_dataset(X, return_dim=True)
super(KNeighborsTimeSeriesClassifier, self).fit(self._X_fit, y)
if hasattr(self, '_ts_metric'):
self.metric = self._ts_metric
return self
(5, 3)
"""
if layer_name is None:
return self.model.get_weights()
else:
return self.model.get_layer(layer_name).get_weights()
if __name__ == "__main__":
from tslearn.datasets import CachedDatasets
from tslearn.preprocessing import TimeSeriesScalerMeanVariance
import time
X_train, y_train, X_test, y_test = CachedDatasets().load_dataset("Trace")
X_train = TimeSeriesScalerMeanVariance().fit_transform(X_train)
X_test = TimeSeriesScalerMeanVariance().fit_transform(X_test)
ts_sz = X_train.shape[1]
l, r = 0.1, 2 # Taken (for dataset Trace) from the Table at:
# http://fs.ismll.de/publicspace/LearningShapelets/
n_classes = len(set(y_train))
n_shapelets_per_size = grabocka_params_to_shapelet_size_dict(ts_sz, n_classes, l, r)
t0 = time.time()
clf = ShapeletModel(n_shapelets_per_size=n_shapelets_per_size,
max_iter=1000,
optimizer=RMSprop(lr=.001),
weight_regularizer=.01,
verbose_level=0)
clf.fit(X_train, y_train)
print("Total time for training: %fs" % (time.time() - t0))
print([shp.shape for shp in clf.shapelets_])
>>> clf = ShapeletModel(n_shapelets_per_size={10: 5}, max_iter=1, verbose_level=0)
>>> clf.fit(X, y).get_weights("softmax")[0].shape
(5, 3)
"""
if layer_name is None:
return self.model.get_weights()
else:
return self.model.get_layer(layer_name).get_weights()
if __name__ == "__main__":
from tslearn.datasets import CachedDatasets
from tslearn.preprocessing import TimeSeriesScalerMeanVariance
import time
X_train, y_train, X_test, y_test = CachedDatasets().load_dataset("Trace")
X_train = TimeSeriesScalerMeanVariance().fit_transform(X_train)
X_test = TimeSeriesScalerMeanVariance().fit_transform(X_test)
ts_sz = X_train.shape[1]
l, r = 0.1, 2 # Taken (for dataset Trace) from the Table at:
# http://fs.ismll.de/publicspace/LearningShapelets/
n_classes = len(set(y_train))
n_shapelets_per_size = grabocka_params_to_shapelet_size_dict(ts_sz, n_classes, l, r)
t0 = time.time()
clf = ShapeletModel(n_shapelets_per_size=n_shapelets_per_size,
max_iter=1000,
optimizer=RMSprop(lr=.001),
weight_regularizer=.01,
verbose_level=0)
clf.fit(X_train, y_train)
explained in details in "Fast global alignment kernels", by M. Cuturi
(ICML 2011).
"""
# Author: Romain Tavenard
# License: BSD 3 clause
import numpy
import matplotlib.pyplot as plt
from tslearn.datasets import CachedDatasets
from tslearn.preprocessing import TimeSeriesScalerMinMax
from tslearn.svm import TimeSeriesSVC
numpy.random.seed(0)
X_train, y_train, X_test, y_test = CachedDatasets().load_dataset("Trace")
X_train = TimeSeriesScalerMinMax().fit_transform(X_train)
X_test = TimeSeriesScalerMinMax().fit_transform(X_test)
clf = TimeSeriesSVC(kernel="gak",
gamma=.1)
clf.fit(X_train, y_train)
print("Correct classification rate:", clf.score(X_test, y_test))
n_classes = len(set(y_train))
plt.figure()
support_vectors = clf.support_vectors_time_series_(X_train)
for i, cl in enumerate(set(y_train)):
plt.subplot(n_classes, 1, i + 1)
plt.title("Support vectors for class %d" % (cl))
for ts in support_vectors[i]:
def fit(self, X):
self._X_fit = to_time_series_dataset(X)
self.weights = _set_weights(self.weights, self._X_fit.shape[0])
if self.barycenter_ is None:
if check_equal_size(self._X_fit):
self.barycenter_ = EuclideanBarycenter.fit(self,
self._X_fit)
else:
resampled_X = TimeSeriesResampler(
sz=self._X_fit.shape[1]).fit_transform(self._X_fit)
self.barycenter_ = EuclideanBarycenter.fit(self,
resampled_X)
if self.max_iter > 0:
# The function works with vectors so we need to vectorize
# barycenter_.
res = minimize(self._func, self.barycenter_.ravel(),
method=self.method, jac=True, tol=self.tol,
options=dict(maxiter=self.max_iter, disp=False))
return res.x.reshape(self.barycenter_.shape)
else:
best_correct_centroids = None
min_inertia = numpy.inf
n_successful = 0
n_attempts = 0
while n_successful < self.n_init and n_attempts < max_attempts:
try:
if self.verbose and self.n_init > 1:
print("Init %d" % (n_successful + 1))
n_attempts += 1
self._fit_one_init(X_, rs)
if self.inertia_ < min_inertia:
best_correct_centroids = self.cluster_centers_.copy()
min_inertia = self.inertia_
self.n_iter_ = self._iter
n_successful += 1
except EmptyClusterError:
if self.verbose:
print("Resumed because of empty cluster")
self._norms_centroids = numpy.linalg.norm(self.cluster_centers_,
axis=(1, 2))
self._post_fit(X_, best_correct_centroids, min_inertia)
return self
# Nearest neighbor classification
knn_clf = KNeighborsTimeSeriesClassifier(n_neighbors=3, metric="dtw")
knn_clf.fit(X_train, y_train)
predicted_labels = knn_clf.predict(X_test)
print("\n2. Nearest neighbor classification using DTW")
print("Correct classification rate:", accuracy_score(y_test, predicted_labels))
# Nearest neighbor classification with a different metric (Euclidean distance)
knn_clf = KNeighborsTimeSeriesClassifier(n_neighbors=3, metric="euclidean")
knn_clf.fit(X_train, y_train)
predicted_labels = knn_clf.predict(X_test)
print("\n3. Nearest neighbor classification using L2")
print("Correct classification rate:", accuracy_score(y_test, predicted_labels))
# Nearest neighbor classification based on SAX representation
sax_trans = SymbolicAggregateApproximation(n_segments=10, alphabet_size_avg=5)
knn_clf = KNeighborsTimeSeriesClassifier(n_neighbors=3, metric="euclidean")
pipeline_model = Pipeline(steps=[('sax', sax_trans), ('knn', knn_clf)])
pipeline_model.fit(X_train, y_train)
predicted_labels = pipeline_model.predict(X_test)
print("\n4. Nearest neighbor classification using SAX+MINDIST")
print("Correct classification rate:", accuracy_score(y_test, predicted_labels))
def _kmeans_init_shapelets(X, n_shapelets, shp_len, n_draw=10000):
n_ts, sz, d = X.shape
indices_ts = numpy.random.choice(n_ts, size=n_draw, replace=True)
indices_time = numpy.random.choice(sz - shp_len + 1, size=n_draw,
replace=True)
subseries = numpy.zeros((n_draw, shp_len, d))
for i in range(n_draw):
subseries[i] = X[indices_ts[i],
indices_time[i]:indices_time[i] + shp_len]
return TimeSeriesKMeans(n_clusters=n_shapelets,
metric="euclidean",
verbose=False).fit(subseries).cluster_centers_