Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
>>> num_true = NumTrue()
>>> num_true([True, False, True, True, None])
3
"""
name = "num_true"
input_types = [Boolean]
return_type = Numeric
default_value = 0
stack_on = []
stack_on_exclude = []
def get_function(self):
return np.sum
class PercentTrue(AggregationPrimitive):
"""Determines the percent of `True` values.
Description:
Given a list of booleans, return the percent
of values which are `True` as a decimal.
`NaN` values are treated as `False`,
adding to the denominator.
Examples:
>>> percent_true = PercentTrue()
>>> percent_true([True, False, True, True, None])
0.6
"""
name = "percent_true"
input_types = [Boolean]
return_type = Numeric
function=time_since_last,
input_types=[DatetimeTimeIndex],
return_type=Numeric,
description="Time since last related instance",
uses_calc_time=True)
'''
if description is None:
default_description = 'A custom primitive'
doc = inspect.getdoc(function)
description = doc if doc is not None else default_description
cls = {"__doc__": description}
if cls_attributes is not None:
cls.update(cls_attributes)
name = name or function.__name__
new_class = type(name, (AggregationPrimitive,), cls)
new_class.name = name
new_class.input_types = input_types
new_class.return_type = return_type
new_class.stack_on = stack_on
new_class.stack_on_exclude = stack_on_exclude
new_class.stack_on_self = stack_on_self
new_class.base_of = base_of
new_class.base_of_exclude = base_of_exclude
new_class.commutative = commutative
new_class.number_output_features = number_output_features
new_class, default_kwargs = inspect_function_args(new_class,
function,
uses_calc_time)
if len(default_kwargs) > 0:
new_class.default_kwargs = default_kwargs
return_type = Numeric
uses_calc_time = True
def __init__(self, unit="seconds"):
self.unit = unit.lower()
def get_function(self):
def time_since_last(values, time=None):
time_since = time - values.iloc[-1]
return convert_time_units(time_since.total_seconds(), self.unit)
return time_since_last
class TimeSinceFirst(AggregationPrimitive):
"""Calculates the time elapsed since the first datetime (in seconds).
Description:
Given a list of datetimes, calculate the
time elapsed since the first datetime (in
seconds). Uses the instance's cutoff time.
Args:
unit (str): Defines the unit of time to count from.
Defaults to seconds. Acceptable values:
years, months, days, hours, minutes, seconds, milliseconds, nanoseconds
Examples:
>>> from datetime import datetime
>>> time_since_first = TimeSinceFirst()
>>> cutoff_time = datetime(2010, 1, 1, 12, 0, 0)
x = x.astype('int64')
# use len(x)-1 because we care about difference
# between values, len(x)-1 = len(diff(x))
avg = (x.max() - x.min()) / (len(x) - 1)
avg = avg * 1e-9
# long form:
# diff_in_ns = x.diff().iloc[1:].astype('int64')
# diff_in_seconds = diff_in_ns * 1e-9
# avg = diff_in_seconds.mean()
return avg
return pd_avg_time_between
class Median(AggregationPrimitive):
"""Determines the middlemost number in a list of values.
Examples:
>>> median = Median()
>>> median([5, 3, 2, 1, 4])
3.0
`NaN` values are ignored.
>>> median([5, 3, 2, 1, 4, None])
3.0
"""
name = "median"
input_types = [Numeric]
return_type = Numeric
Examples:
>>> any = Any()
>>> any([False, False, False, True])
True
"""
name = "any"
input_types = [Boolean]
return_type = Boolean
stack_on_self = False
def get_function(self):
return np.any
class All(AggregationPrimitive):
"""Calculates if all values are 'True' in a list.
Description:
Given a list of booleans, return `True` if all
of the values are `True`.
Examples:
>>> all = All()
>>> all([False, False, False, True])
False
"""
name = "all"
input_types = [Boolean]
return_type = Boolean
stack_on_self = False
>>> last = Last()
>>> last([1, 2, 3, 4, 5, None])
nan
"""
name = "last"
input_types = [Variable]
return_type = None
stack_on_self = False
def get_function(self):
def pd_last(x):
return x.iloc[-1]
return pd_last
class Any(AggregationPrimitive):
"""Determines if any value is 'True' in a list.
Description:
Given a list of booleans, return `True` if one or
more of the values are `True`.
Examples:
>>> any = Any()
>>> any([False, False, False, True])
True
"""
name = "any"
input_types = [Boolean]
return_type = Boolean
stack_on_self = False
Examples:
>>> skew = Skew()
>>> skew([1, 10, 30, None])
1.0437603722639681
"""
name = "skew"
input_types = [Numeric]
return_type = Numeric
stack_on = []
stack_on_self = False
def get_function(self):
return pd.Series.skew
class Std(AggregationPrimitive):
"""Computes the dispersion relative to the mean value, ignoring `NaN`.
Examples:
>>> std = Std()
>>> round(std([1, 2, 3, 4, 5, None]), 3)
1.414
"""
name = "std"
input_types = [Numeric]
return_type = Numeric
stack_on_self = False
def get_function(self):
return np.std
Examples:
>>> mode = Mode()
>>> mode(['red', 'blue', 'green', 'blue'])
'blue'
"""
name = "mode"
input_types = [Discrete]
return_type = None
def get_function(self):
def pd_mode(s):
return s.mode().get(0, np.nan)
return pd_mode
class Min(AggregationPrimitive):
"""Calculates the smallest value, ignoring `NaN` values.
Examples:
>>> min = Min()
>>> min([1, 2, 3, 4, 5, None])
1.0
"""
name = "min"
input_types = [Numeric]
return_type = Numeric
stack_on_self = False
def get_function(self):
return np.min
3.0
`NaN` values are ignored.
>>> median([5, 3, 2, 1, 4, None])
3.0
"""
name = "median"
input_types = [Numeric]
return_type = Numeric
def get_function(self):
return pd.Series.median
class Skew(AggregationPrimitive):
"""Computes the extent to which a distribution differs from a normal distribution.
Description:
For normally distributed data, the skewness should be about 0.
A skewness value > 0 means that there is more weight in the
left tail of the distribution.
Examples:
>>> skew = Skew()
>>> skew([1, 10, 30, None])
1.0437603722639681
"""
name = "skew"
input_types = [Numeric]
return_type = Numeric
stack_on = []
return_type = Numeric
def __init__(self, skipna=True):
self.skipna = skipna
def get_function(self):
if self.skipna:
# np.mean of series is functionally nanmean
return np.mean
def mean(series):
return np.mean(series.values)
return mean
class Mode(AggregationPrimitive):
"""Determines the most commonly repeated value.
Description:
Given a list of values, return the value with the
highest number of occurences. If list is
empty, return `NaN`.
Examples:
>>> mode = Mode()
>>> mode(['red', 'blue', 'green', 'blue'])
'blue'
"""
name = "mode"
input_types = [Discrete]
return_type = None