How to use pyts - 10 common examples

To help you get started, we’ve selected a few pyts examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github johannfaouzi / pyts / pyts / transformation / transformation.py View on Github external
for i in range(n_windows)])
            X_window = X_window.reshape(n_samples * n_windows, -1, order='F')
        else:
            n_windows = n_features // self.window_size
            remainder = n_features % self.window_size
            if remainder == 0:
                window_idx = np.array_split(np.arange(0, n_features),
                                            n_windows)
            else:
                split_idx = np.arange(self.window_size,
                                      n_windows * (self.window_size + 1),
                                      self.window_size)
                window_idx = np.split(np.arange(0, n_features), split_idx)[:-1]
            X_window = X[:, window_idx].reshape(n_samples * n_windows, -1)

        sfa = SFA(self.n_coefs, False, self.norm_mean,
                  self.norm_std, self.n_bins, self.quantiles,
                  self.variance_selection, self.variance_threshold)
        count = CountVectorizer(ngram_range=(1, 1))

        X_sfa = sfa.fit_transform(X_window)
        X_sfa = np.apply_along_axis(lambda x: ''.join(x),
                                    1,
                                    X_sfa).reshape(n_samples, -1)
        word_size = len(X_sfa[0, 0])
        if word_size == 1:
            count.set_params(tokenizer=self._tok)
        if self.numerosity_reduction:
            X_sfa = np.apply_along_axis(numerosity_reduction, 1, X_sfa)
        else:
            X_sfa = np.apply_along_axis(lambda x: ' '.join(x), 1, X_sfa)
        count.fit(X_sfa)
github johannfaouzi / pyts / pyts / transformation / transformation.py View on Github external
order='F')
            else:
                n_windows = n_features // window_size
                remainder = n_features % window_size
                if remainder == 0:
                    window_idx = np.array_split(np.arange(0, n_features),
                                                n_windows)
                else:
                    split_idx = np.arange(window_size,
                                          (n_windows + 1) * window_size,
                                          window_size)
                    window_idx = np.split(np.arange(0, n_features),
                                          split_idx)[:-1]
                X_window = X[:, window_idx].reshape(n_samples * n_windows, -1)

            sfa = SFA(self.n_coefs, True, self.norm_mean,
                      self.norm_std, self.n_bins, 'entropy',
                      self.variance_selection, self.variance_threshold)
            count = CountVectorizer(ngram_range=(1, 2))

            y_window = np.repeat(y_ind, n_windows)
            X_sfa = sfa.fit_transform(X_window, y_window)
            X_sfa = np.apply_along_axis(lambda x: ''.join(x),
                                        1,
                                        X_sfa).reshape(n_samples, -1)
            word_size = len(X_sfa[0, 0])
            if word_size == 1:
                count.set_params(tokenizer=self._tok)
            X_sfa = np.apply_along_axis(lambda x: ' '.join(x), 1, X_sfa)

            tf = count.fit_transform(X_sfa)
            _, pval = chi2(tf, y_ind)
github johannfaouzi / pyts / pyts / transformation / transformation.py View on Github external
for i in range(n_windows)])
            X_window = X_window.reshape(n_samples * n_windows, -1, order='F')
        else:
            n_windows = n_features // self.window_size
            remainder = n_features % self.window_size
            if remainder == 0:
                window_idx = np.array_split(np.arange(0, n_features),
                                            n_windows)
            else:
                split_idx = np.arange(self.window_size,
                                      n_windows * (self.window_size + 1),
                                      self.window_size)
                window_idx = np.split(np.arange(0, n_features), split_idx)[:-1]
            X_window = X[:, window_idx].reshape(n_samples * n_windows, -1)

        sfa = SFA(self.n_coefs, False, self.norm_mean,
                  self.norm_std, self.n_bins, self.quantiles,
                  self.variance_selection, self.variance_threshold)
        count = CountVectorizer(ngram_range=(1, 1))

        X_sfa = sfa.fit_transform(X_window)
        X_sfa = np.apply_along_axis(lambda x: ''.join(x),
                                    1,
                                    X_sfa).reshape(n_samples, -1)
        word_size = len(X_sfa[0, 0])
        if word_size == 1:
            count.set_params(tokenizer=self._tok)
        if self.numerosity_reduction:
            X_sfa = np.apply_along_axis(numerosity_reduction, 1, X_sfa)
        else:
            X_sfa = np.apply_along_axis(lambda x: ' '.join(x), 1, X_sfa)
        tf = count.fit_transform(X_sfa)
github johannfaouzi / pyts / pyts / classification / classification.py View on Github external
for i in range(n_windows)])
            X_window = X_window.reshape(n_samples * n_windows, -1, order='F')
        else:
            n_windows = n_features // self.window_size
            remainder = n_features % self. window_size
            if remainder == 0:
                window_idx = np.array_split(np.arange(0, n_features),
                                            n_windows)
            else:
                split_idx = np.arange(self.window_size,
                                      n_windows * (self.window_size + 1),
                                      self.window_size)
                window_idx = np.split(np.arange(0, n_features), split_idx)[:-1]
            X_window = X[:, window_idx].reshape(n_samples * n_windows, -1)

        sfa = SFA(self.n_coefs, False, self.norm_mean,
                  self.norm_std, self.n_bins, self.quantiles,
                  self.variance_selection, self.variance_threshold)
        tfidf = TfidfVectorizer(ngram_range=(1, 1), smooth_idf=self.smooth_idf,
                                sublinear_tf=self.sublinear_tf)

        X_sfa = sfa.fit_transform(X_window)
        X_sfa = np.apply_along_axis(lambda x: ''.join(x),
                                    1,
                                    X_sfa).reshape(n_samples, -1)
        word_size = len(X_sfa[0, 0])
        if word_size == 1:
            tfidf.set_params(tokenizer=self._tok)
        if self.numerosity_reduction:
            X_sfa = np.apply_along_axis(numerosity_reduction, 1, X_sfa)
        else:
            X_sfa = np.apply_along_axis(lambda x: ' '.join(x), 1, X_sfa)
github johannfaouzi / pyts / pyts / transformation / boss.py View on Github external
Returns
        -------
        X_new : sparse matrix, shape = (n_samples, n_words)
            Document-term matrix.

        """
        X = check_array(X)
        n_samples, n_timestamps = X.shape
        if y is not None:
            check_classification_targets(y)

        window_size, window_step = self._check_params(n_timestamps)
        n_windows = (n_timestamps - window_size + window_step) // window_step

        X_windowed = _windowed_view(
            X, n_samples, n_timestamps, window_size, window_step
        )
        X_windowed = X_windowed.reshape(n_samples * n_windows, window_size)

        sfa = SymbolicFourierApproximation(
            n_coefs=self.word_size, drop_sum=self.drop_sum, anova=self.anova,
            norm_mean=self.norm_mean, norm_std=self.norm_std,
            n_bins=self.n_bins, strategy=self.strategy, alphabet=self.alphabet
        )
        if y is None:
            y_repeated = None
        else:
            y_repeated = np.repeat(y, n_windows)
        X_sfa = sfa.fit_transform(X_windowed, y_repeated)

        X_word = np.asarray([''.join(X_sfa[i])
github johannfaouzi / pyts / pyts / classification / bossvs.py View on Github external
-------
        self : object

        """
        X, y = check_X_y(X, y)
        n_samples, n_timestamps = X.shape
        check_classification_targets(y)
        le = LabelEncoder()
        y_ind = le.fit_transform(y)
        self.classes_ = le.classes_
        n_classes = self.classes_.size

        window_size, window_step = self._check_params(n_timestamps)
        n_windows = (n_timestamps - window_size + window_step) // window_step

        X_windowed = _windowed_view(
            X, n_samples, n_timestamps, window_size, window_step
        )
        X_windowed = X_windowed.reshape(n_samples * n_windows, window_size)

        sfa = SymbolicFourierApproximation(
            n_coefs=self.word_size, drop_sum=self.drop_sum, anova=self.anova,
            norm_mean=self.norm_mean, norm_std=self.norm_std,
            n_bins=self.n_bins, strategy=self.strategy, alphabet=self.alphabet
        )
        y_repeated = np.repeat(y, n_windows)
        X_sfa = sfa.fit_transform(X_windowed, y_repeated)

        X_word = np.asarray([''.join(X_sfa[i])
                             for i in range(n_samples * n_windows)])
        X_word = X_word.reshape(n_samples, n_windows)
github johannfaouzi / pyts / pyts / bag_of_words / bow.py View on Github external
----------
        X : array-like, shape = (n_samples, n_timestamps)

        Returns
        -------
        X_new : array, shape = (n_samples,)
            Transformed data. Each row is a string consisting of words
            separated by a whitespace.

        """
        X = check_array(X, dtype=None)
        n_samples, n_timestamps = X.shape
        window_size, window_step = self._check_params(n_timestamps)
        n_windows = (n_timestamps - window_size + window_step) // window_step

        X_window = _windowed_view(X, n_samples, n_timestamps,
                                  window_size, window_step)
        X_word = np.asarray([[''.join(X_window[i, j])
                              for j in range(n_windows)]
                             for i in range(n_samples)])

        if self.numerosity_reduction:
            not_equal = np.c_[X_word[:, 1:] != X_word[:, :-1],
                              np.full(n_samples, True)]
            X_bow = np.asarray([' '.join(X_word[i, not_equal[i]])
                                for i in range(n_samples)])
        else:
            X_bow = np.asarray([' '.join(X_word[i]) for i in range(n_samples)])
        return X_bow
github johannfaouzi / pyts / pyts / transformation / weasel.py View on Github external
check_is_fitted(self, ['_relevant_features_list', '_sfa_list',
                               '_vectorizer_list', 'vocabulary_'])

        X = check_array(X, dtype='float64')
        n_samples, n_timestamps = X.shape

        X_features = coo_matrix((n_samples, 0), dtype=np.int64)

        for (window_size, window_step, sfa,
             vectorizer, relevant_features) in zip(
                 self._window_sizes, self._window_steps, self._sfa_list,
                 self._vectorizer_list, self._relevant_features_list):

            n_windows = ((n_timestamps - window_size + window_step)
                         // window_step)
            X_windowed = _windowed_view(
                X, n_samples, n_timestamps, window_size, window_step
            )
            X_windowed = X_windowed.reshape(n_samples * n_windows, window_size)
            X_sfa = sfa.transform(X_windowed)

            X_word = np.asarray([''.join(X_sfa[i])
                                 for i in range(n_samples * n_windows)])
            X_word = X_word.reshape(n_samples, n_windows)
            X_bow = np.asarray([' '.join(X_word[i]) for i in range(n_samples)])
            X_counts = vectorizer.transform(X_bow)[:, relevant_features]
            X_features = hstack([X_features, X_counts])

        if not self.sparse:
            return X_features.A
        return csr_matrix(X_features)
github johannfaouzi / pyts / pyts / transformation / boss.py View on Github external
Class labels for each data sample.

        Returns
        -------
        self : object

        """
        X = check_array(X)
        n_samples, n_timestamps = X.shape
        if y is not None:
            check_classification_targets(y)

        window_size, window_step = self._check_params(n_timestamps)
        n_windows = (n_timestamps - window_size + window_step) // window_step

        X_windowed = _windowed_view(
            X, n_samples, n_timestamps, window_size, window_step
        )
        X_windowed = X_windowed.reshape(n_samples * n_windows, window_size)

        sfa = SymbolicFourierApproximation(
            n_coefs=self.word_size, drop_sum=self.drop_sum, anova=self.anova,
            norm_mean=self.norm_mean, norm_std=self.norm_std,
            n_bins=self.n_bins, strategy=self.strategy, alphabet=self.alphabet
        )
        if y is None:
            y_repeated = None
        else:
            y_repeated = np.repeat(y, n_windows)
        X_sfa = sfa.fit_transform(X_windowed, y_repeated)

        X_word = np.asarray([''.join(X_sfa[i])
github johannfaouzi / pyts / examples / bag_of_words / plot_bow.py View on Github external
of the sliding window is equal to the size of the sliding window, making the
subseries non-overlapping. It is common to use a step of 1 for the sliding
window, which is the default behavior. It is implemented as
:class:`pyts.bag_of_words.BagOfWords`.
"""

# Author: Johann Faouzi 
# License: BSD-3-Clause

import matplotlib.pyplot as plt
import numpy as np
from pyts.bag_of_words import BagOfWords
from pyts.datasets import load_gunpoint

# Load the dataset and perform the transformation
X, _, _, _ = load_gunpoint(return_X_y=True)
window_size, word_size = 30, 5
bow = BagOfWords(window_size=window_size, word_size=word_size,
                 window_step=window_size, numerosity_reduction=False)
X_bow = bow.transform(X)

# Plot the considered subseries
plt.figure(figsize=(10, 4))
splits_series = np.linspace(0, X.shape[1], 1 + X.shape[1] // window_size,
                            dtype='int64')
for start, end in zip(splits_series[:-1],
                      np.clip(splits_series[1:] + 1, 0, X.shape[1])):
    plt.plot(np.arange(start, end), X[0, start:end], 'o-', lw=1, ms=1)

# Plot the corresponding letters
splits_letters = np.linspace(0, X.shape[1],
                             1 + word_size * X.shape[1] // window_size)