Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
features = self._features_by_type(all_features=all_features,
entity=parent_entity,
variable_type=[Numeric,
Categorical,
Ordinal],
max_depth=max_depth)
for f in features:
if self._feature_in_relationship_path([relationship], f):
continue
# limits allowing direct features of agg_feats with where clauses
if isinstance(f, AggregationFeature):
deep_base_features = [f] + f.get_dependencies(deep=True)
for feat in deep_base_features:
if isinstance(feat, AggregationFeature) and feat.where is not None:
continue
new_f = DirectFeature(f, child_entity)
self._handle_new_feature(all_features=all_features,
new_feature=new_f)
if len(feat_wheres) >= self.where_stacking_limit:
continue
# limits the aggregation feature by the given allowed feature types.
if not any([issubclass(type(agg_prim), type(primitive))
for primitive in self.where_primitives]):
continue
for where in wheres:
# limits the where feats so they are different than base feats
base_hashes = [f.hash() for f in new_f.base_features]
if any([base_feat.hash() in base_hashes for base_feat in where.base_features]):
continue
new_f = AggregationFeature(matching_input,
parent_entity=parent_entity,
where=where,
primitive=agg_prim)
self._handle_new_feature(new_f, all_features)
no_unapproximated_aggs = True
for feature in features:
if isinstance(feature, AggregationFeature):
# do not need to check if feature is in to_approximate since
# only base features of direct features can be in to_approximate
no_unapproximated_aggs = False
break
if approximate is not None:
all_approx_features = {f for _, feats in feature_set.approximate_feature_trie
for f in feats}
else:
all_approx_features = set()
deps = feature.get_dependencies(deep=True, ignored=all_approx_features)
for dependency in deps:
if isinstance(dependency, AggregationFeature):
no_unapproximated_aggs = False
break
cutoff_df_time_var = 'time'
target_time = '_original_time'
if approximate is not None:
# If there are approximated aggs, bin times
binned_cutoff_time = bin_cutoff_times(cutoff_time.copy(), approximate)
# Think about collisions: what if original time is a feature
binned_cutoff_time[target_time] = cutoff_time[cutoff_df_time_var]
cutoff_time_to_pass = binned_cutoff_time
else:
def _get_use_previous(f):
if isinstance(f, AggregationFeature) and f.use_previous is not None:
return (f.use_previous.unit, f.use_previous.value)
else:
return ("", -1)
max_depth=new_max_depth)
# remove features in relationship path
relationship_path = self.es.find_backward_path(parent_entity.id,
child_entity.id)
features = [f for f in features if not self._feature_in_relationship_path(
relationship_path, f)]
matching_inputs = match(input_types, features,
commutative=agg_prim.commutative)
wheres = list(self.where_clauses[child_entity.id])
for matching_input in matching_inputs:
if not check_stacking(agg_prim, matching_input):
continue
new_f = AggregationFeature(matching_input,
parent_entity=parent_entity,
primitive=agg_prim)
self._handle_new_feature(new_f, all_features)
# Obey allow where
if not agg_prim.allow_where:
continue
# limit the stacking of where features
# count up the the number of where features
# in this feature and its dependencies
feat_wheres = []
for f in matching_input:
if isinstance(f, AggregationFeature) and f.where is not None:
feat_wheres.append(f)
for feat in f.get_dependencies(deep=True):
def _get_where(f):
if isinstance(f, AggregationFeature) and f.where is not None:
return f.where.unique_name()
else:
return ''
def _feature_type_handler(self, f):
if type(f) == TransformFeature:
return self._calculate_transform_features
elif type(f) == GroupByTransformFeature:
return self._calculate_groupby_features
elif type(f) == DirectFeature:
return self._calculate_direct_features
elif type(f) == AggregationFeature:
return self._calculate_agg_features
elif type(f) == IdentityFeature:
return self._calculate_identity_features
else:
raise UnknownFeature(u"{} feature unknown".format(f.__class__))
target_entity = features[0].entity
dtype = entityset[target_entity.id].df[target_entity.index].dtype
cutoff_time["instance_id"] = cutoff_time["instance_id"].astype(dtype)
feature_set = FeatureSet(features)
# Get features to approximate
if approximate is not None:
approximate_feature_trie = gather_approximate_features(feature_set)
# Make a new FeatureSet that ignores approximated features
feature_set = FeatureSet(features, approximate_feature_trie=approximate_feature_trie)
# Check if there are any non-approximated aggregation features
no_unapproximated_aggs = True
for feature in features:
if isinstance(feature, AggregationFeature):
# do not need to check if feature is in to_approximate since
# only base features of direct features can be in to_approximate
no_unapproximated_aggs = False
break
if approximate is not None:
all_approx_features = {f for _, feats in feature_set.approximate_feature_trie
for f in feats}
else:
all_approx_features = set()
deps = feature.get_dependencies(deep=True, ignored=all_approx_features)
for dependency in deps:
if isinstance(dependency, AggregationFeature):
no_unapproximated_aggs = False
break