Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
... datetime(2019, 11, 30, 19, 45, 15)]
>>> month = Month()
>>> month(dates).tolist()
[3, 6, 11]
"""
name = "month"
input_types = [Datetime]
return_type = Ordinal
def get_function(self):
def month(vals):
return pd.DatetimeIndex(vals).month.values
return month
class Year(TransformPrimitive):
"""Determines the year value of a datetime.
Examples:
>>> from datetime import datetime
>>> dates = [datetime(2019, 3, 1),
... datetime(2048, 6, 17, 11, 10, 50),
... datetime(1950, 11, 30, 19, 45, 15)]
>>> year = Year()
>>> year(dates).tolist()
[2019, 2048, 1950]
"""
name = "year"
input_types = [Datetime]
return_type = Ordinal
def get_function(self):
"""
name = 'time_since'
input_types = [[DatetimeTimeIndex], [Datetime]]
return_type = Numeric
uses_calc_time = True
def __init__(self, unit="seconds"):
self.unit = unit.lower()
def get_function(self):
def pd_time_since(array, time):
return convert_time_units((time - pd.DatetimeIndex(array)).total_seconds(), self.unit)
return pd_time_since
class IsIn(TransformPrimitive):
"""Determines whether a value is present in a provided list.
Examples:
>>> items = ['string', 10.3, False]
>>> is_in = IsIn(list_of_outputs=items)
>>> is_in(['string', 10.5, False]).tolist()
[True, False, True]
"""
name = "isin"
input_types = [Variable]
return_type = Boolean
def __init__(self, list_of_outputs=None):
self.list_of_outputs = list_of_outputs
def get_function(self):
... 'Two words',
... 'no-spaces',
... 'Also works with sentences. Second sentence!']).tolist()
[4, 2, 1, 6]
"""
name = 'num_words'
input_types = [Text]
return_type = Numeric
def get_function(self):
def word_counter(array):
return pd.Series(array).fillna('').str.count(' ') + 1
return word_counter
class TimeSince(TransformPrimitive):
"""Calculates time from a value to a specified cutoff datetime.
Args:
unit (str): Defines the unit of time to count from.
Defaults to Seconds. Acceptable values:
years, months, days, hours, minutes, seconds, milliseconds, nanoseconds
Examples:
>>> from datetime import datetime
>>> time_since = TimeSince()
>>> times = [datetime(2019, 3, 1, 0, 0, 0, 1),
... datetime(2019, 3, 1, 0, 0, 1, 0),
... datetime(2019, 3, 1, 0, 2, 0, 0)]
>>> cutoff_time = datetime(2019, 3, 1, 0, 0, 0, 0)
>>> values = time_since(array=times, time=cutoff_time)
>>> list(map(int, values))
input_types = [Numeric]
return_type = Numeric
def __init__(self, value=1):
self.value = value
def get_function(self):
def modulo_scalar(vals):
return vals % self.value
return modulo_scalar
def generate_name(self, base_feature_names):
return "%s %% %s" % (base_feature_names[0], str(self.value))
class ModuloByFeature(TransformPrimitive):
"""Return the modulo of a scalar by each element in the list.
Description:
Given a list of numeric values and a scalar, return the
modulo, or remainder of the scalar after being divided
by each value.
Examples:
>>> modulo_by_feature = ModuloByFeature(value=2)
>>> modulo_by_feature([4, 1, 2]).tolist()
[2, 0, 0]
"""
name = "modulo_by_feature"
input_types = [Numeric]
return_type = Numeric
input_types = [Numeric]
return_type = Numeric
def __init__(self, value=1):
self.value = value
def get_function(self):
def divide_by_feature(vals):
return self.value / vals
return divide_by_feature
def generate_name(self, base_feature_names):
return "%s / %s" % (str(self.value), base_feature_names[0])
class ModuloNumeric(TransformPrimitive):
"""Element-wise modulo of two lists.
Description:
Given a list of values X and a list of values Y,
determine the modulo, or remainder of each value in
X after it's divided by its corresponding value in Y.
Examples:
>>> modulo_numeric = ModuloNumeric()
>>> modulo_numeric([2, 1, 5], [1, 2, 2]).tolist()
[0, 1, 1]
"""
name = "modulo_numeric"
input_types = [Numeric, Numeric]
return_type = Numeric
return_type = Boolean
def __init__(self, value=0):
self.value = value
def get_function(self):
def greater_than_scalar(vals):
# convert series to handle both numeric and datetime case
return pd.Series(vals) > self.value
return greater_than_scalar
def generate_name(self, base_feature_names):
return "%s > %s" % (base_feature_names[0], str(self.value))
class GreaterThanEqualTo(TransformPrimitive):
"""Determines if values in one list are greater than or equal to another list.
Description:
Given a list of values X and a list of values Y, determine
whether each value in X is greater than or equal to each
corresponding value in Y. Equal pairs will return `True`.
Examples:
>>> greater_than_equal_to = GreaterThanEqualTo()
>>> greater_than_equal_to([2, 1, 2], [1, 2, 2]).tolist()
[True, False, True]
"""
name = "greater_than_equal_to"
input_types = [[Numeric, Numeric], [Datetime, Datetime], [Ordinal, Ordinal]]
return_type = Boolean
Nan values are ignored when determining rank
>>> percentile([10, 15, 1, None, 20]).tolist()
[0.5, 0.75, 0.25, nan, 1.0]
"""
name = 'percentile'
uses_full_entity = True
input_types = [Numeric]
return_type = Numeric
def get_function(self):
return lambda array: pd.Series(array).rank(pct=True)
class Latitude(TransformPrimitive):
"""Returns the first tuple value in a list of LatLong tuples.
For use with the LatLong variable type.
Examples:
>>> latitude = Latitude()
>>> latitude([(42.4, -71.1),
... (40.0, -122.4),
... (41.2, -96.75)]).tolist()
[42.4, 40.0, 41.2]
"""
name = 'latitude'
input_types = [LatLong]
return_type = Numeric
def get_function(self):
return lambda array: pd.Series([x[0] for x in array])
... datetime(2019, 11, 30, 19, 45, 15)]
>>> week = Week()
>>> week(dates).tolist()
[1, 25, 48]
"""
name = "week"
input_types = [Datetime]
return_type = Ordinal
def get_function(self):
def week(vals):
return pd.DatetimeIndex(vals).week.values
return week
class Month(TransformPrimitive):
"""Determines the month value of a datetime.
Examples:
>>> from datetime import datetime
>>> dates = [datetime(2019, 3, 1),
... datetime(2019, 6, 17, 11, 10, 50),
... datetime(2019, 11, 30, 19, 45, 15)]
>>> month = Month()
>>> month(dates).tolist()
[3, 6, 11]
"""
name = "month"
input_types = [Datetime]
return_type = Ordinal
def get_function(self):
return_type = Boolean
def __init__(self, value=0):
self.value = value
def get_function(self):
def less_than_equal_to_scalar(vals):
# convert series to handle both numeric and datetime case
return pd.Series(vals) <= self.value
return less_than_equal_to_scalar
def generate_name(self, base_feature_names):
return "%s <= %s" % (base_feature_names[0], str(self.value))
class Equal(TransformPrimitive):
"""Determines if values in one list are equal to another list.
Description:
Given a list of values X and a list of values Y, determine
whether each value in X is equal to each corresponding value
in Y.
Examples:
>>> equal = Equal()
>>> equal([2, 1, 2], [1, 2, 2]).tolist()
[False, False, True]
"""
name = "equal"
input_types = [Variable, Variable]
return_type = Boolean
commutative = True
input_types = [Numeric]
return_type = Numeric
def __init__(self, value=1):
self.value = value
def get_function(self):
def multiply_scalar(vals):
return vals * self.value
return multiply_scalar
def generate_name(self, base_feature_names):
return "%s * %s" % (base_feature_names[0], str(self.value))
class MultiplyBoolean(TransformPrimitive):
"""Element-wise multiplication of two lists of boolean values.
Description:
Given a list of boolean values X and a list of boolean
values Y, determine the product of each value in X
with its corresponding value in Y.
Examples:
>>> multiply_boolean = MultiplyBoolean()
>>> multiply_boolean([True, True, False], [True, False, True]).tolist()
[True, False, False]
"""
name = "multiply_boolean"
input_types = [[Boolean, Boolean]]
return_type = Boolean