Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_conditional_expectation_returns_same_value_as_Hardie_excel_sheet(self, cdnow_customers):
mbfg = estimation.ModifiedBetaGeoFitter()
mbfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
x = 2
t_x = 30.43
T = 38.86
t = 39
expected = 1.226
actual = mbfg.conditional_expected_number_of_purchases_up_to_time(t, x, t_x, T)
assert abs(expected - actual) < 0.05
def test_scaling_inputs_gives_same_or_similar_results(self, cdnow_customers):
mbgf = estimation.ModifiedBetaGeoFitter()
mbgf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
scale = 10.
mbgf_with_large_inputs = estimation.ModifiedBetaGeoFitter()
mbgf_with_large_inputs.fit(cdnow_customers['frequency'], scale * cdnow_customers['recency'], scale * cdnow_customers['T'], iterative_fitting=2)
assert mbgf_with_large_inputs._scale < 1.
assert abs(mbgf_with_large_inputs.conditional_probability_alive(1, scale * 1, scale * 2) - mbgf.conditional_probability_alive(1, 1, 2)) < 10e-2
assert abs(mbgf_with_large_inputs.conditional_probability_alive(1, scale * 2, scale * 10) - mbgf.conditional_probability_alive(1, 2, 10)) < 10e-2
def test_conditional_probability_alive_returns_lessthan_1_if_no_repeat_purchases(self, cdnow_customers):
mbfg = estimation.ModifiedBetaGeoFitter()
mbfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
assert mbfg.conditional_probability_alive(0, 1, 1) < 1.0
def test_purchase_predictions_do_not_differ_much_if_looking_at_hourly_or_daily_frequencies(self):
transaction_data = load_transaction_data(parse_dates=['date'])
daily_summary = utils.summary_data_from_transaction_data(transaction_data, 'id', 'date', observation_period_end=max(transaction_data.date), freq='D')
hourly_summary = utils.summary_data_from_transaction_data(transaction_data, 'id', 'date', observation_period_end=max(transaction_data.date), freq='h')
thirty_days = 30
hours_in_day = 24
mbfg = estimation.ModifiedBetaGeoFitter()
np.random.seed(0)
mbfg.fit(daily_summary['frequency'], daily_summary['recency'], daily_summary['T'])
thirty_day_prediction_from_daily_data = mbfg.expected_number_of_purchases_up_to_time(thirty_days)
np.random.seed(0)
mbfg.fit(hourly_summary['frequency'], hourly_summary['recency'], hourly_summary['T'])
thirty_day_prediction_from_hourly_data = mbfg.expected_number_of_purchases_up_to_time(thirty_days * hours_in_day)
npt.assert_almost_equal(thirty_day_prediction_from_daily_data, thirty_day_prediction_from_hourly_data)
def test_fit_method_allows_for_better_accuracy_by_using_iterative_fitting(self, cdnow_customers):
mbfg1 = estimation.ModifiedBetaGeoFitter()
mbfg2 = estimation.ModifiedBetaGeoFitter()
np.random.seed(0)
mbfg1.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
np.random.seed(0)
mbfg2.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'], iterative_fitting=5)
assert mbfg1._negative_log_likelihood_ >= mbfg2._negative_log_likelihood_
def test_mgbf_does_not_hang_for_small_datasets_but_can_be_improved_with_iterative_fitting(self, cdnow_customers):
reduced_dataset = cdnow_customers.iloc[:2]
mbfg1 = estimation.ModifiedBetaGeoFitter()
mbfg2 = estimation.ModifiedBetaGeoFitter()
np.random.seed(0)
mbfg1.fit(reduced_dataset['frequency'], reduced_dataset['recency'], reduced_dataset['T'])
np.random.seed(0)
mbfg2.fit(reduced_dataset['frequency'], reduced_dataset['recency'], reduced_dataset['T'], iterative_fitting=10)
assert mbfg1._negative_log_likelihood_ >= mbfg2._negative_log_likelihood_
def test_conditional_probability_alive_matrix(self, cdnow_customers):
mbfg = estimation.ModifiedBetaGeoFitter()
mbfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
Z = mbfg.conditional_probability_alive_matrix()
max_t = int(mbfg.data['T'].max())
for t_x in range(Z.shape[0]):
for x in range(Z.shape[1]):
assert Z[t_x][x] == mbfg.conditional_probability_alive(x, t_x, max_t)
def test_conditional_probability_alive_is_between_0_and_1(self, cdnow_customers):
mbfg = estimation.ModifiedBetaGeoFitter()
mbfg.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
for i in range(0, 100, 10):
for j in range(0, 100, 10):
for k in range(j, 100, 10):
assert 0 <= mbfg.conditional_probability_alive(i, j, k) <= 1.0
def test_fit_method_allows_for_better_accuracy_by_using_iterative_fitting(self, cdnow_customers):
mbfg1 = estimation.ModifiedBetaGeoFitter()
mbfg2 = estimation.ModifiedBetaGeoFitter()
np.random.seed(0)
mbfg1.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
np.random.seed(0)
mbfg2.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'], iterative_fitting=5)
assert mbfg1._negative_log_likelihood_ >= mbfg2._negative_log_likelihood_
def test_sum_of_scalar_inputs_to_negative_log_likelihood_is_equal_to_array(self):
mbgf = estimation.ModifiedBetaGeoFitter
x = np.array([1, 3])
t_x = np.array([2, 2])
t = np.array([5, 6])
weights=np.array([1, 1])
params = [1, 1, 1, 1]
assert mbgf._negative_log_likelihood(params, np.array([x[0]]), np.array([t_x[0]]), np.array([t[0]]), weights[0], 0) \
+ mbgf._negative_log_likelihood(params, np.array([x[1]]), np.array([t_x[1]]), np.array([t[1]]), weights[0], 0) \
== 2 * mbgf._negative_log_likelihood(params, x, t_x, t, weights, 0)