Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
bgf = estimation.BetaGeoFitter()
bgf.fit(cdnow_customers_with_monetary_value['frequency'],
cdnow_customers_with_monetary_value['recency'],
cdnow_customers_with_monetary_value['T'],
iterative_fitting=3)
ggf_clv = ggf.customer_lifetime_value(
bgf,
cdnow_customers_with_monetary_value['frequency'],
cdnow_customers_with_monetary_value['recency'],
cdnow_customers_with_monetary_value['T'],
cdnow_customers_with_monetary_value['monetary_value']
)
utils_clv = utils._customer_lifetime_value(
bgf,
cdnow_customers_with_monetary_value['frequency'],
cdnow_customers_with_monetary_value['recency'],
cdnow_customers_with_monetary_value['T'],
ggf.conditional_expected_average_profit(cdnow_customers_with_monetary_value['frequency'],
cdnow_customers_with_monetary_value['monetary_value'])
)
npt.assert_equal(ggf_clv.values, utils_clv.values)
def test_plot_calibration_purchases_vs_holdout_purchases_time_since_last_purchase(self, transaction_data, bgf):
holdout_expected = [3.954, 3.431, 3.482, 3.484, 2.75, 2.289, 1.968]
predictions_expected = [4.345, 2.993, 3.236, 2.677, 2.240, 2.608, 2.430]
labels = ['frequency_holdout', 'model_predictions']
summary = utils.calibration_and_holdout_data(transaction_data, 'id', 'date', '2014-09-01', '2014-12-31')
bgf.fit(summary['frequency_cal'], summary['recency_cal'], summary['T_cal'])
ax = plotting.plot_calibration_purchases_vs_holdout_purchases(bgf, summary, kind='time_since_last_purchase')
lines = ax.lines
legend = ax.legend_
holdout = lines[0].get_data()[1]
predictions = lines[1].get_data()[1]
assert_allclose(holdout, holdout_expected, atol=0.01)
assert_allclose(predictions, predictions_expected, atol=0.01)
assert_array_equal([e.get_text() for e in legend.get_texts()], labels)
assert_equal(ax.title.get_text(), "Actual Purchases in Holdout Period vs Predicted Purchases")
assert_equal(ax.xaxis.get_label().get_text(), "Time since user made last purchase")
assert_equal(ax.yaxis.get_label().get_text(), "Average of Purchases in Holdout Period")
plt.close()
def bgf_transactions(cdnow_transactions):
transactions_summary = utils.summary_data_from_transaction_data(
cdnow_transactions, 'id_sample', 'date', datetime_format='%Y%m%d',
observation_period_end='19970930', freq='W')
bgf = BetaGeoFitter(penalizer_coef=0.01)
bgf.fit(transactions_summary['frequency'],
transactions_summary['recency'], transactions_summary['T'])
return bgf
def test_purchase_predictions_do_not_differ_much_if_looking_at_hourly_or_daily_frequencies(self):
transaction_data = load_transaction_data(parse_dates=['date'])
daily_summary = utils.summary_data_from_transaction_data(transaction_data, 'id', 'date', observation_period_end=max(transaction_data.date), freq='D')
hourly_summary = utils.summary_data_from_transaction_data(transaction_data, 'id', 'date', observation_period_end=max(transaction_data.date), freq='h')
thirty_days = 30
hours_in_day = 24
mbfg = estimation.ModifiedBetaGeoFitter()
np.random.seed(0)
mbfg.fit(daily_summary['frequency'], daily_summary['recency'], daily_summary['T'])
thirty_day_prediction_from_daily_data = mbfg.expected_number_of_purchases_up_to_time(thirty_days)
np.random.seed(0)
mbfg.fit(hourly_summary['frequency'], hourly_summary['recency'], hourly_summary['T'])
thirty_day_prediction_from_hourly_data = mbfg.expected_number_of_purchases_up_to_time(thirty_days * hours_in_day)
npt.assert_almost_equal(thirty_day_prediction_from_daily_data, thirty_day_prediction_from_hourly_data)
def test_summary_data_from_transaction_data_with_specific_datetime_format(transaction_level_data):
transaction_level_data['date'] = transaction_level_data['date'].map(lambda x: x.replace('-', ''))
format = '%Y%m%d'
today = '20150207'
actual = utils.summary_data_from_transaction_data(transaction_level_data, 'id', 'date', observation_period_end=today, datetime_format=format)
expected = pd.DataFrame([[1, 1., 5., 6.],
[2, 0., 0., 37.],
[3, 2., 4., 37.]], columns=['id', 'frequency', 'recency', 'T']).set_index('id')
assert_frame_equal(actual, expected)
def example_summary_data(example_transaction_data):
return utils.summary_data_from_transaction_data(example_transaction_data, 'id', 'date', observation_period_end=max(example_transaction_data.date))