How to use the featuretools.variable_types.Boolean function in featuretools

To help you get started, we’ve selected a few featuretools examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github FeatureLabs / featuretools / featuretools / demo / flight.py View on Github external
def make_es(data):
    es = ft.EntitySet('Flight Data')
    arr_time_columns = ['arr_delay', 'dep_delay', 'carrier_delay', 'weather_delay',
                        'national_airspace_delay', 'security_delay',
                        'late_aircraft_delay', 'canceled', 'diverted',
                        'taxi_in', 'taxi_out', 'air_time', 'dep_time']

    variable_types = {'flight_num': vtypes.Categorical,
                      'distance_group': vtypes.Ordinal,
                      'canceled': vtypes.Boolean,
                      'diverted': vtypes.Boolean}

    es.entity_from_dataframe('trip_logs',
                             data,
                             index='trip_log_id',
                             make_index=True,
                             time_index='date_scheduled',
                             secondary_time_index={'arr_time': arr_time_columns},
                             variable_types=variable_types)

    es.normalize_entity('trip_logs', 'flights', 'flight_id',
                        additional_variables=['origin', 'origin_city', 'origin_state',
                                              'dest', 'dest_city', 'dest_state',
                                              'distance_group', 'carrier', 'flight_num'])

    es.normalize_entity('flights', 'airlines', 'carrier',
                        make_time_index=False)
github FeatureLabs / featuretools / featuretools / feature_base / feature_base.py View on Github external
def __mul__(self, other):
        """Multiply by other"""
        if isinstance(other, FeatureBase):
            if self.variable_type == Boolean and other.variable_type == Boolean:
                return Feature([self, other], primitive=primitives.MultiplyBoolean)
        return self._handle_binary_comparision(other, primitives.MultiplyNumeric, primitives.MultiplyNumericScalar)
github FeatureLabs / featuretools / featuretools / utils / entity_utils.py View on Github external
else:
                inferred_type = vtypes.Categorical

                # heuristics to predict this some other than categorical
                sample = df[variable].sample(min(10000, len(df[variable])))

                # catch cases where object dtype cannot be interpreted as a string
                try:
                    avg_length = sample.str.len().mean()
                    if avg_length > 50:
                        inferred_type = vtypes.Text
                except AttributeError:
                    pass

        elif df[variable].dtype == "bool":
            inferred_type = vtypes.Boolean

        elif pdtypes.is_categorical_dtype(df[variable].dtype):
            inferred_type = vtypes.Categorical

        elif pdtypes.is_numeric_dtype(df[variable].dtype):
            inferred_type = vtypes.Numeric

        elif col_is_datetime(df[variable]):
            inferred_type = vtypes.Datetime

        elif len(df[variable]):
            sample = df[variable] \
                .sample(min(10000, df[variable].nunique(dropna=False)))

            unique = sample.unique()
            percent_unique = sample.size / len(unique)
github FeatureLabs / featuretools / featuretools / synthesis / deep_feature_synthesis.py View on Github external
"""
        all_features = {}
        for e in self.es.entities:
            if e not in self.ignore_entities:
                all_features[e.id] = {}

        self.where_clauses = defaultdict(set)
        self._run_dfs(self.es[self.target_entity_id], [],
                      all_features, max_depth=self.max_depth)

        new_features = list(all_features[self.target_entity_id].values())

        if variable_types is None:
            variable_types = [Numeric,
                              Discrete,
                              Boolean]
        elif variable_types == 'all':
            variable_types = None
        else:
            msg = "variable_types must be a list, or 'all'"
            assert isinstance(variable_types, list), msg

        if variable_types is not None:
            new_features = [f for f in new_features
                            if any(issubclass(f.variable_type, vt) for vt in variable_types)]

        def filt(f):
            # remove identity features of the ID field of the target entity
            if (isinstance(f, IdentityFeature) and
                    f.entity.id == self.target_entity_id and
                    f.variable.id == self.es[self.target_entity_id].index):
                return False
github FeatureLabs / featuretools / featuretools / primitives / standard / binary_transform.py View on Github external
class And(TransformPrimitive):
    """Element-wise logical AND of two lists.

    Description:
        Given a list of booleans X and a list of booleans Y,
        determine whether each value in X is `True`, and
        whether its corresponding value in Y is also `True`.

    Examples:
        >>> _and = And()
        >>> _and([False, True, False], [True, True, False]).tolist()
        [False, True, False]
    """
    name = "and"
    input_types = [Boolean, Boolean]
    return_type = Boolean
    commutative = True

    def get_function(self):
        return np.logical_and

    def generate_name(self, base_feature_names):
        return "AND(%s, %s)" % (base_feature_names[0], base_feature_names[1])


class Or(TransformPrimitive):
    """Element-wise logical OR of two lists.

    Description:
        Given a list of booleans X and a list of booleans Y,
        determine whether each value in X is `True`, or
github FeatureLabs / featuretools / featuretools / utils / entity_utils.py View on Github external
if new_type == vtypes.Numeric:
        orig_nonnull = df[column_id].dropna().shape[0]
        df[column_id] = pd.to_numeric(df[column_id], errors='coerce')
        # This will convert strings to nans
        # If column contained all strings, then we should
        # just raise an error, because that shouldn't have
        # been converted to numeric
        nonnull = df[column_id].dropna().shape[0]
        if nonnull == 0 and orig_nonnull != 0:
            raise TypeError("Attempted to convert all string column {} to numeric".format(column_id))
    elif issubclass(new_type, vtypes.Datetime):
        format = kwargs.get("format", None)
        # TODO: if float convert to int?
        df[column_id] = pd.to_datetime(df[column_id], format=format,
                                       infer_datetime_format=True)
    elif new_type == vtypes.Boolean:
        map_dict = {kwargs.get("true_val", True): True,
                    kwargs.get("false_val", False): False,
                    True: True,
                    False: False}
        # TODO: what happens to nans?
        df[column_id] = df[column_id].map(map_dict).astype(np.bool)
    elif not issubclass(new_type, vtypes.Discrete):
        raise Exception("Cannot convert column %s to %s" %
                        (column_id, new_type))
    return df
github Featuretools / DL-DB / dldb / preprocessor.py View on Github external
def fit_transform(self, ftens, fl=None, categorical_feature_names=None, labels=None):
        if categorical_feature_names is not None:
            self.categorical_feature_names = categorical_feature_names
        elif fl is not None:
            self.categorical_feature_names = [f.get_name() for f in fl
                                              if issubclass(f.variable_type,
                                                            Discrete)
                                              and not
                                              f.variable_type == Boolean]
        else:
            self.categorical_feature_names = [c for c in ftens.columns
                                              if ftens[c].dtype == object]

        # Can't handle multiindex
        if len(ftens.index.names) > 1:
            index_name = ftens.index.names[0]
            ftens = ftens.reset_index(index_name, drop=False).set_index(index_name)
        self.categorical_vocab = self._gen_categorical_mapping(ftens)

        self.numeric_columns = [f for f in ftens.columns
                                if f not in self.categorical_feature_names]

        ftens = self.fit_transform_scaler_imputer(ftens)

        if not self.regression:
github FeatureLabs / featuretools / featuretools / primitives / standard / aggregation_primitives.py View on Github external
class NumTrue(AggregationPrimitive):
    """Counts the number of `True` values.

    Description:
        Given a list of booleans, return the number
        of `True` values. Ignores 'NaN'.

    Examples:
        >>> num_true = NumTrue()
        >>> num_true([True, False, True, True, None])
        3
    """
    name = "num_true"
    input_types = [Boolean]
    return_type = Numeric
    default_value = 0
    stack_on = []
    stack_on_exclude = []

    def get_function(self):
        return np.sum


class PercentTrue(AggregationPrimitive):
    """Determines the percent of `True` values.

    Description:
        Given a list of booleans, return the percent
        of values which are `True` as a decimal.
        `NaN` values are treated as `False`,
github FeatureLabs / featuretools / featuretools / primitives / standard / binary_transform.py View on Github external
"""Element-wise multiplication of two lists.

    Description:
        Given a list of values X and a list of values
        Y, determine the product of each value in X
        with its corresponding value in Y.

    Examples:
        >>> multiply_numeric = MultiplyNumeric()
        >>> multiply_numeric([2, 1, 2], [1, 2, 2]).tolist()
        [2, 2, 4]
    """
    name = "multiply_numeric"
    input_types = [
        [Numeric, Numeric],
        [Numeric, Boolean],
        [Boolean, Numeric],
    ]
    return_type = Numeric
    commutative = True

    def get_function(self):
        return np.multiply

    def generate_name(self, base_feature_names):
        return "%s * %s" % (base_feature_names[0], base_feature_names[1])


class MultiplyNumericScalar(TransformPrimitive):
    """Multiply each element in the list by a scalar.

    Description:
github HDI-Project / MLBlocks / examples / pipelines / multitable / multitable.py View on Github external
def make_entity_set(orders_table, order_products_table):
    es = ft.EntitySet("instacart")

    es.entity_from_dataframe(
        entity_id="order_products",
        dataframe=order_products_table,
        index="order_product_id",
        variable_types={
            "aisle_id": ft.variable_types.Categorical,
            "reordered": ft.variable_types.Boolean
        },
        time_index="order_time")

    es.entity_from_dataframe(
        entity_id="orders",
        dataframe=orders_table,
        index="order_id",
        time_index="order_time")

    es.add_relationship(
        ft.Relationship(es["orders"]["order_id"],
                        es["order_products"]["order_id"]))

    es.normalize_entity(
        base_entity_id="orders", new_entity_id="users", index="user_id")
    es.add_last_time_indexes()