Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
dataframe=order_products_table,
index="order_product_id",
variable_types={
"aisle_id": ft.variable_types.Categorical,
"reordered": ft.variable_types.Boolean
},
time_index="order_time")
es.entity_from_dataframe(
entity_id="orders",
dataframe=orders_table,
index="order_id",
time_index="order_time")
es.add_relationship(
ft.Relationship(es["orders"]["order_id"],
es["order_products"]["order_id"]))
es.normalize_entity(
base_entity_id="orders", new_entity_id="users", index="user_id")
es.add_last_time_indexes()
es["order_products"]["department"].interesting_values = [
'produce', 'dairy eggs', 'snacks', 'beverages', 'frozen', 'pantry',
'bakery', 'canned goods', 'deli', 'dry goods pasta'
]
es["order_products"]["product_name"].interesting_values = [
'Banana', 'Bag of Organic Bananas', 'Organic Baby Spinach',
'Organic Strawberries', 'Organic Hass Avocado', 'Organic Avocado',
'Large Lemon', 'Limes', 'Strawberries', 'Organic Whole Milk'
]
return es
df.columns = [EntitySpark.recover_col_name(entity_id, col) for col in columns]
es.entity_from_dataframe(entity_id=entity.entity_id,
dataframe=df,
index=EntitySpark.recover_col_name(entity_id, entity.index),
variable_types=entity.variable_types,
time_index=EntitySpark.recover_col_name(entity_id, entity.time_index),
secondary_time_index=EntitySpark.recover_col_name(entity_id,
entity.secondary_time_index))
for relationship in relationships:
parent_entity = relationship.parent_variable.entity_id
parent_col = EntitySpark.recover_col_name(parent_entity, relationship.parent_variable.column_name)
child_entity = relationship.child_variable.entity_id
child_col = EntitySpark.recover_col_name(child_entity, relationship.child_variable.column_name)
es.add_relationship(ft.Relationship(es[parent_entity][parent_col],
es[child_entity][child_col]))
feature_matrix, feature_dfs = ft.dfs(entityset=es,
agg_primitives=agg_primitives,
trans_primitives=trans_primitives,
target_entity=target_entity,
cutoff_time=cutoff_time,
cutoff_time_in_index=False,
n_jobs=n_jobs,
max_depth=max_depth,
training_window=training_window,
approximate=approximate,
chunk_size=chunk_size)
feature_matrix.reset_index(inplace=True)
self.entity_col = entity_col
self.es = ft.EntitySet(id=name)
self.es = self.es.entity_from_dataframe(entity_id=name,
dataframe=df,
time_index=time_col,
index="__id__",
make_index=True,
variable_types=variable_types
)
entity_df = pd.DataFrame([[i] for i in set(df[entity_col])], columns=[entity_col])
self.es = self.es.entity_from_dataframe(entity_id=entity_col,
dataframe=entity_df,
index=entity_col
)
new_relationship = ft.Relationship(self.es[entity_col][entity_col],
self.es[name][entity_col])
self.es = self.es.add_relationship(new_relationship)
def from_dictionary(cls, arguments, entityset, dependencies, primitives_deserializer):
base_feature = dependencies[arguments['base_feature']]
relationship = Relationship.from_dictionary(arguments['relationship'], entityset)
child_entity = relationship.child_entity
return cls(base_feature=base_feature,
child_entity=child_entity,
relationship=relationship,
name=arguments['name'])
es = es.entity_from_dataframe(entity_id="sessions",
dataframe=sessions_df,
index="session_id",
time_index="session_start")
es = es.entity_from_dataframe(entity_id="customers",
dataframe=customers_df,
index="customer_id",
time_index="join_date",
variable_types={"zip_code": ZIPCode})
rels = [ft.Relationship(es["products"]["product_id"],
es["transactions"]["product_id"]),
ft.Relationship(es["sessions"]["session_id"],
es["transactions"]["session_id"]),
ft.Relationship(es["customers"]["customer_id"],
es["sessions"]["customer_id"])]
es = es.add_relationships(rels)
es.add_last_time_indexes()
return es
return {"customers": customers_df,
"sessions": sessions_df,
"transactions": transactions_df,
"products": products_df}
index="product_id")
es = es.entity_from_dataframe(entity_id="sessions",
dataframe=sessions_df,
index="session_id",
time_index="session_start")
es = es.entity_from_dataframe(entity_id="customers",
dataframe=customers_df,
index="customer_id",
time_index="join_date",
variable_types={"zip_code": ZIPCode})
rels = [ft.Relationship(es["products"]["product_id"],
es["transactions"]["product_id"]),
ft.Relationship(es["sessions"]["session_id"],
es["transactions"]["session_id"]),
ft.Relationship(es["customers"]["customer_id"],
es["sessions"]["customer_id"])]
es = es.add_relationships(rels)
es.add_last_time_indexes()
return es
return {"customers": customers_df,
"sessions": sessions_df,
"transactions": transactions_df,
"products": products_df}