How to use the featuretools.utils.Trie function in featuretools

To help you get started, we’ve selected a few featuretools examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github FeatureLabs / featuretools / featuretools / computational_backends / feature_set_calculator.py View on Github external
Returns:
            pd.DataFrame : Pandas DataFrame of calculated feature values.
                Indexed by instance_ids. Columns in same order as features
                passed in.
        """
        assert len(instance_ids) > 0, "0 instance ids provided"

        if progress_callback is None:
            # do nothing for the progress call back if not provided
            def progress_callback(*args):
                pass
        feature_trie = self.feature_set.feature_trie

        df_trie = Trie(path_constructor=RelationshipPath)
        full_entity_df_trie = Trie(path_constructor=RelationshipPath)

        target_entity = self.entityset[self.feature_set.target_eid]
        self._calculate_features_for_entity(entity_id=self.feature_set.target_eid,
                                            feature_trie=feature_trie,
                                            df_trie=df_trie,
                                            full_entity_df_trie=full_entity_df_trie,
                                            precalculated_trie=self.precalculated_features,
                                            filter_variable=target_entity.index,
                                            filter_values=instance_ids,
                                            progress_callback=progress_callback)

        # The dataframe for the target entity should be stored at the root of
        # df_trie.
        df = df_trie.value

        if df.empty:
github FeatureLabs / featuretools / featuretools / computational_backends / utils.py View on Github external
def gather_approximate_features(feature_set):
    """
    Find features which can be approximated. Returned as a trie where the values
    are sets of feature names.

    Args:
        feature_set (FeatureSet): Features to search the dependencies of for
            features to approximate.

    Returns:
        Trie[RelationshipPath, set[str]]
    """
    approximate_feature_trie = Trie(default=set, path_constructor=RelationshipPath)

    for feature in feature_set.target_features:
        if feature_set.uses_full_entity(feature, check_dependents=True):
            continue

        if isinstance(feature, DirectFeature):
            path = feature.relationship_path
            base_feature = feature.base_features[0]

            while isinstance(base_feature, DirectFeature):
                path = path + base_feature.relationship_path
                base_feature = base_feature.base_features[0]

            if isinstance(base_feature, AggregationFeature):
                node_feature_set = approximate_feature_trie.get_node(path).value
                node_feature_set.add(base_feature.unique_name())
github FeatureLabs / featuretools / featuretools / computational_backends / feature_set_calculator.py View on Github external
precalculated_features (Trie[RelationshipPath -> pd.DataFrame]):
                Maps RelationshipPaths to dataframes of precalculated_features

        """
        self.entityset = entityset
        self.feature_set = feature_set
        self.training_window = training_window

        if time_last is None:
            time_last = datetime.now()

        self.time_last = time_last

        if precalculated_features is None:
            precalculated_features = Trie(path_constructor=RelationshipPath)

        self.precalculated_features = precalculated_features

        # total number of features (including dependencies) to be calculate
        self.num_features = sum(len(features1) + len(features2) for _, (_, features1, features2) in self.feature_set.feature_trie)
github FeatureLabs / featuretools / featuretools / computational_backends / feature_set.py View on Github external
def _build_feature_trie(self):
        """
        Build the feature trie by adding the target features and their dependencies recursively.
        """
        feature_trie = Trie(default=lambda: (False, set(), set()),
                            path_constructor=RelationshipPath)

        for f in self.target_features:
            self._add_feature_to_trie(feature_trie,
                                      f,
                                      self.approximate_feature_trie)

        return feature_trie
github FeatureLabs / featuretools / featuretools / computational_backends / calculate_feature_matrix.py View on Github external
window (Timedelta or str): frequency to group instances with similar
            cutoff times by for features with costly calculations. For example,
            if bucket is 24 hours, all instances with cutoff times on the same
            day will use the same calculation for expensive features.

        entityset (:class:`.EntitySet`): An already initialized entityset.

        feature_set (:class:`.FeatureSet`): The features to be calculated.

        training_window (`Timedelta`, optional):
            Window defining how much older than the cutoff time data
            can be to be included when calculating the feature. If None, all older data is used.

        save_progress (str, optional): path to save intermediate computational results
    '''
    approx_fms_trie = Trie(path_constructor=RelationshipPath)

    target_time_colname = 'target_time'
    cutoff_time[target_time_colname] = cutoff_time['time']
    approx_cutoffs = bin_cutoff_times(cutoff_time.copy(), window)
    cutoff_df_time_var = 'time'
    cutoff_df_instance_var = 'instance_id'
    # should this order be by dependencies so that calculate_feature_matrix
    # doesn't skip approximating something?
    for relationship_path, approx_feature_names in feature_set.approximate_feature_trie:
        if not approx_feature_names:
            continue

        cutoffs_with_approx_e_ids, new_approx_entity_index_var = \
            _add_approx_entity_index_var(entityset, feature_set.target_eid,
                                         approx_cutoffs.copy(), relationship_path)
github FeatureLabs / featuretools / featuretools / computational_backends / feature_set_calculator.py View on Github external
progress_callback (callable): function to be called with incremental progress updates

        Returns:
            pd.DataFrame : Pandas DataFrame of calculated feature values.
                Indexed by instance_ids. Columns in same order as features
                passed in.
        """
        assert len(instance_ids) > 0, "0 instance ids provided"

        if progress_callback is None:
            # do nothing for the progress call back if not provided
            def progress_callback(*args):
                pass
        feature_trie = self.feature_set.feature_trie

        df_trie = Trie(path_constructor=RelationshipPath)
        full_entity_df_trie = Trie(path_constructor=RelationshipPath)

        target_entity = self.entityset[self.feature_set.target_eid]
        self._calculate_features_for_entity(entity_id=self.feature_set.target_eid,
                                            feature_trie=feature_trie,
                                            df_trie=df_trie,
                                            full_entity_df_trie=full_entity_df_trie,
                                            precalculated_trie=self.precalculated_features,
                                            filter_variable=target_entity.index,
                                            filter_values=instance_ids,
                                            progress_callback=progress_callback)

        # The dataframe for the target entity should be stored at the root of
        # df_trie.
        df = df_trie.value
github FeatureLabs / featuretools / featuretools / computational_backends / feature_set.py View on Github external
def __init__(self, features, approximate_feature_trie=None):
        """
        Args:
            features (list[Feature]): Features of the target entity.
            approximate_feature_trie (Trie[RelationshipPath, set[str]], optional): Dependency
                features to ignore because they have already been approximated. For example, if
                one of the target features is a direct feature of a feature A and A is included in
                approximate_feature_trie then neither A nor its dependencies will appear in
                FeatureSet.feature_trie.
        """
        self.target_eid = features[0].entity.id
        self.target_features = features
        self.target_feature_names = {f.unique_name() for f in features}

        if not approximate_feature_trie:
            approximate_feature_trie = Trie(default=list,
                                            path_constructor=RelationshipPath)
        self.approximate_feature_trie = approximate_feature_trie

        # Maps the unique name of each feature to the actual feature. This is necessary
        # because features do not support equality and so cannot be used as
        # dictionary keys. The equality operator on features produces a new
        # feature (which will always be truthy).
        self.features_by_name = {f.unique_name(): f for f in features}

        feature_dependents = defaultdict(set)
        for f in features:
            deps = f.get_dependencies(deep=True)
            for dep in deps:
                feature_dependents[dep.unique_name()].add(f.unique_name())
                self.features_by_name[dep.unique_name()] = dep
                subdeps = dep.get_dependencies(deep=True)