Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
class LinearTrend(AggregationPrimitive):
"""Calculate a linear least-squares regression for the values of the time
series versus the sequence from 0 to length of the time series minus one.
This feature assumes the signal to be uniformly sampled. It will not use
the time stamps to fit the model.
Args:
attr (str) : Controls which of the characteristics are returned.
Possible extracted attributes are:
['pvalue', 'rvalue', 'intercept', 'slope', 'stderr'].
Docstring source:
https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.linear_trend
"""
name = "linear_trend"
input_types = [Numeric]
return_type = Numeric
stack_on_self = False
def __init__(self, attr):
self.attr = attr
def get_function(self):
def function(x):
param = [{'attr': self.attr}]
return list(linear_trend(x, param))[0][1]
return function
from featuretools.primitives import AggregationPrimitive
from featuretools.variable_types import Numeric
from tsfresh.feature_extraction.feature_calculators import maximum
class Maximum(AggregationPrimitive):
"""Calculates the highest value of the time series x.
Docstring source:
https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.maximum
"""
name = "maximum"
input_types = [Numeric]
return_type = Numeric
stack_on_self = False
def get_function(self):
return maximum
from featuretools.variable_types import Numeric
from tsfresh.feature_extraction.feature_calculators import quantile
class Quantile(AggregationPrimitive):
"""Calculates the q quantile of x. This is the value of x greater than q%
of the ordered values from x.
Args:
q (float) : The quantile to calculate.
Docstring source:
https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.quantile
"""
name = "quantile"
input_types = [Numeric]
return_type = Numeric
stack_on_self = False
def __init__(self, q):
self.q = q
def get_function(self):
def function(x):
return quantile(x, q=self.q)
return function
from featuretools.primitives import AggregationPrimitive
from featuretools.variable_types import Numeric
from tsfresh.feature_extraction.feature_calculators import count_below_mean
class CountBelowMean(AggregationPrimitive):
"""Returns the number of values in x that are lower than the mean of x
Docstring source:
https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.count_below_mean
"""
name = "count_below_mean"
input_types = [Numeric]
return_type = Numeric
stack_on_self = False
def get_function(self):
return count_below_mean
# catch cases where object dtype cannot be interpreted as a string
try:
avg_length = sample.str.len().mean()
if avg_length > 50:
inferred_type = vtypes.Text
except AttributeError:
pass
elif df[variable].dtype == "bool":
inferred_type = vtypes.Boolean
elif pdtypes.is_categorical_dtype(df[variable].dtype):
inferred_type = vtypes.Categorical
elif pdtypes.is_numeric_dtype(df[variable].dtype):
inferred_type = vtypes.Numeric
elif col_is_datetime(df[variable]):
inferred_type = vtypes.Datetime
elif len(df[variable]):
sample = df[variable] \
.sample(min(10000, df[variable].nunique(dropna=False)))
unique = sample.unique()
percent_unique = sample.size / len(unique)
if percent_unique < .05:
inferred_type = vtypes.Categorical
else:
inferred_type = vtypes.Numeric
from featuretools.primitives import AggregationPrimitive
from featuretools.variable_types import Boolean, Numeric
from tsfresh.feature_extraction.feature_calculators import has_duplicate_min
class HasDuplicateMin(AggregationPrimitive):
"""Checks if the minimal value of x is observed more than once.
Docstring source:
https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.has_duplicate_min
"""
name = "has_duplicate_min"
input_types = [Numeric]
return_type = Boolean
stack_on_self = False
def get_function(self):
return has_duplicate_min
from featuretools.primitives import AggregationPrimitive
from featuretools.variable_types import Numeric
from tsfresh.feature_extraction.feature_calculators import \
longest_strike_below_mean
class LongestStrikeBelowMean(AggregationPrimitive):
"""Returns the length of the longest consecutive subsequence in x that is
smaller than the mean of x.
Docstring source:
https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.longest_strike_below_mean
"""
name = "longest_strike_below_mean"
input_types = [Numeric]
return_type = Numeric
stack_on_self = False
def get_function(self):
return longest_strike_below_mean
from tsfresh.feature_extraction.feature_calculators import sample_entropy
class SampleEntropy(AggregationPrimitive):
"""Calculate and return sample entropy of x.
.. rubric:: References
| [1] http://en.wikipedia.org/wiki/Sample_Entropy
| [2] https://www.ncbi.nlm.nih.gov/pubmed/10843903?dopt=Abstract
Docstring source:
https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.sample_entropy
"""
name = "sample_entropy"
input_types = [Numeric]
return_type = Numeric
stack_on_self = False
def get_function(self):
return sample_entropy
from featuretools.variable_types import Numeric
from tsfresh.feature_extraction.feature_calculators import ratio_beyond_r_sigma
class RatioBeyondRSigma(AggregationPrimitive):
"""Ratio of values that are more than r*std(x) (so r sigma) away from the
mean of x.
Args:
r (float) : Weight of sigma.
Docstring source:
https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.ratio_beyond_r_sigma
"""
name = "ratio_beyond_r_sigma"
input_types = [Numeric]
return_type = Numeric
stack_on_self = False
def __init__(self, r):
self.r = r
def get_function(self):
def function(x):
return ratio_beyond_r_sigma(x, r=self.r)
return function
time_index=self.dt_col)
def is_awake(column):
hour = column.dt.hour
return (((hour >= 6) & (hour <= 23)) | (hour == 0)).astype(int)
def is_busy_hours(column):
hour = column.dt.hour
return (((hour >= 7) & (hour <= 9)) | (hour >= 16) & (hour <= 19)).astype(int)
IsAwake = make_trans_primitive(function=is_awake,
input_types=[DatetimeTimeIndex],
return_type=Numeric)
IsBusyHours = make_trans_primitive(function=is_busy_hours,
input_types=[DatetimeTimeIndex],
return_type=Numeric)
feature_matrix, feature_defs = ft.dfs(entityset=es,
target_entity="time_seq",
agg_primitives=["count"],
trans_primitives=["month", "weekday", "day", "hour",
"is_weekend", IsAwake, IsBusyHours])
return feature_matrix, feature_defs