Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def inactive_membership(transactions, window):
transactions = transactions.sort_values('transaction_date')
if len(transactions) == 1:
cutoff_time, end_time = window
elapsed_inactive = end_time - transactions['membership_expire_date'].iloc[0]
return elapsed_inactive
membership_expire_date = transactions['membership_expire_date'].iloc[0]
next_transaction_date = transactions['transaction_date'].iloc[1]
elapsed_inactive = next_transaction_date - membership_expire_date
return elapsed_inactive
label_maker = cp.LabelMaker(
target_entity='msno',
time_index='lead_time',
labeling_function=inactive_membership,
window_size='100d',
)
# +
now = pd.Timestamp.now()
label_times = label_maker.search(
transactions,
minimum_data=0,
num_examples_per_instance=2,
gap=1,
verbose=True,
)
from composeml import LabelMaker
from featuretools.demo import load_mock_customer
full_df = load_mock_customer(return_single_table=True)
def my_labeling_function(df_slice):
label = df_slice["amount"].mean() > 80
return label
lm = LabelMaker(
target_entity="customer_id",
time_index="transaction_time",
labeling_function=my_labeling_function,
window_size="2h",
)
lt = lm.search(
full_df,
minimum_data="1h",
num_examples_per_instance=2,
gap="2h",
)
lt
# flake8:noqa
import composeml as cp
def my_labeling_function(df_slice):
"""one slice of data inside of the prediction window for single instance of target entity"""
label = df_slice["voltage"].mean()
return label
# todo name
lm = cp.LabelMaker(target_entity="machine_id",
time_index="timestamp",
labeling_function=my_labeling_function,
window_size="7 days")
# describe the parameters to search for the labels
# returns a LabelTimes object, which is basically a pandas dataframe
lt = lm.search(dataframe=full_df,
minimum_data="20 days", # minimum data before starting search
num_examples_per_instance=10, # examples per unique instance of target entity
gap="7 days") # time between examples
lt.summarize() # prints out distribution of labels
lt.describe() # prints out all the settings used to make the labels
# functions to modify label times and return a new copy of label times