Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_all_primitives(entityset, parameters):
is_agg_primitive = lambda name: issubclass(primitives[name], ft.primitives.AggregationPrimitive)
construct_primitive = lambda name: primitives[name](**parameters.get(name, {}))
agg_primitives = [construct_primitive(name) for name in primitives if is_agg_primitive(name)]
feature_matrix, features = ft.dfs(entityset=entityset, target_entity='sessions', agg_primitives=agg_primitives)
assert not feature_matrix.empty
def fit(self, X, **kwargs):
self.features = ft.dfs(
cutoff_time=X,
features_only=True,
max_depth=self.max_depth,
**kwargs
)
def is_awake(column):
hour = column.dt.hour
return (((hour >= 6) & (hour <= 23)) | (hour == 0)).astype(int)
def is_busy_hours(column):
hour = column.dt.hour
return (((hour >= 7) & (hour <= 9)) | (hour >= 16) & (hour <= 19)).astype(int)
IsAwake = make_trans_primitive(function=is_awake,
input_types=[DatetimeTimeIndex],
return_type=Numeric)
IsBusyHours = make_trans_primitive(function=is_busy_hours,
input_types=[DatetimeTimeIndex],
return_type=Numeric)
feature_matrix, feature_defs = ft.dfs(entityset=es,
target_entity="time_seq",
agg_primitives=["count"],
trans_primitives=["month", "weekday", "day", "hour",
"is_weekend", IsAwake, IsBusyHours])
return feature_matrix, feature_defs
instance_id_column = index
else:
instance_ids = cutoffs.iloc[:, 0]
instance_id_column = cutoffs.columns[0]
time_column = 'time'
if time_column not in cutoffs:
not_instance_id = [c for c in cutoffs.columns
if c != instance_id_column]
time_column = not_instance_id[0]
times = cutoffs[time_column]
temporal_cutoffs = make_temporal_cutoffs(instance_ids,
times,
window_size,
num_windows,
start)
result = ft.dfs(entityset=entityset,
features_only=features_only,
cutoff_time=temporal_cutoffs,
target_entity=target_entity,
cutoff_time_in_index=True,
**kwargs)
if not features_only:
fm, fl = result
return fm.sort_index(level=[entityset[target_entity].index,
'time']), fl
return result
def compute_features(self, df, cutoff_strategy, feature_window):
assert cutoff_strategy.entity_col == self.entity_col
cutoffs = cutoff_strategy.generate_cutoffs(df)
cutoffs_ft = []
for _id, row in cutoffs.iterrows():
cutoffs_ft.append((row[self.entity_col], row['cutoff_st'] - timedelta(days=1)))
cutoffs_ft = pd.DataFrame(cutoffs_ft, columns=['instance_id', 'time'])
feature_matrix, features = ft.dfs(target_entity=self.entity_col,
cutoff_time=cutoffs_ft,
training_window="%dday" % feature_window, # same as above
entityset=self.es,
cutoff_time_in_index=True,
verbose=True)
# encode categorical values
fm_encoded, features_encoded = ft.encode_features(feature_matrix,
features)
self.features = fm_encoded.fillna(0)
dataframe=df,
index=EntitySpark.recover_col_name(entity_id, entity.index),
variable_types=entity.variable_types,
time_index=EntitySpark.recover_col_name(entity_id, entity.time_index),
secondary_time_index=EntitySpark.recover_col_name(entity_id,
entity.secondary_time_index))
for relationship in relationships:
parent_entity = relationship.parent_variable.entity_id
parent_col = EntitySpark.recover_col_name(parent_entity, relationship.parent_variable.column_name)
child_entity = relationship.child_variable.entity_id
child_col = EntitySpark.recover_col_name(child_entity, relationship.child_variable.column_name)
es.add_relationship(ft.Relationship(es[parent_entity][parent_col],
es[child_entity][child_col]))
feature_matrix, feature_dfs = ft.dfs(entityset=es,
agg_primitives=agg_primitives,
trans_primitives=trans_primitives,
target_entity=target_entity,
cutoff_time=cutoff_time,
cutoff_time_in_index=False,
n_jobs=n_jobs,
max_depth=max_depth,
training_window=training_window,
approximate=approximate,
chunk_size=chunk_size)
feature_matrix.reset_index(inplace=True)
columns = sorted(feature_matrix.columns)
res = []
for i in range(feature_matrix.shape[0]):