Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_fit_with_index(self, donations):
bbtf = estimation.BetaGeoBetaBinomFitter()
index = range(len(donations), 0, -1)
bbtf.fit(
donations['frequency'],
donations['recency'],
donations['periods'],
donations['weights'],
index=index
)
assert (bbtf.data.index == index).all() == True
bbtf = estimation.BetaGeoBetaBinomFitter()
bbtf.fit(
donations['frequency'],
donations['recency'],
donations['periods'],
donations['weights'],
index=None
)
assert (bbtf.data.index == index).all() == False
def test_expected_purchases_in_n_periods_returns_same_value_as_Hardie_excel_sheet(self, donations):
"""Total expected from Hardie's In-Sample Fit sheet."""
bbtf = estimation.BetaGeoBetaBinomFitter()
bbtf.fit(
donations['frequency'],
donations['recency'],
donations['periods'],
donations['weights'],
)
expected = np.array([3454.9, 1253.1]) # Cells C18 and C24
estimated = bbtf.expected_number_of_transactions_in_first_n_periods(6).loc[[0,6]].values.flatten()
npt.assert_almost_equal(expected, estimated, decimal=0)
def test_fit_with_and_without_weights(self, donations):
exploded_dataset = pd.DataFrame(columns=['frequency', 'recency', 'periods'])
for _, row in donations.iterrows():
exploded_dataset = exploded_dataset.append(
pd.DataFrame(
[[row['frequency'], row['recency'], row['periods']]] * row['weights'],
columns = ['frequency', 'recency', 'periods']
))
exploded_dataset = exploded_dataset.astype(np.int64)
assert exploded_dataset.shape[0] == donations['weights'].sum()
bbtf_noweights = estimation.BetaGeoBetaBinomFitter()
bbtf_noweights.fit(
exploded_dataset['frequency'],
exploded_dataset['recency'],
exploded_dataset['periods'],
)
bbtf = estimation.BetaGeoBetaBinomFitter()
bbtf.fit(
donations['frequency'],
donations['recency'],
donations['periods'],
donations['weights'],
)
npt.assert_array_almost_equal(
np.array(bbtf_noweights._unload_params('alpha','beta','gamma','delta')),
def test_prob_alive_is_close_to_Hardie_paper_table_6(self, donations):
"""Table 6: P(Alive in 2002) as a Function of Recency and Frequency"""
bbtf = estimation.BetaGeoBetaBinomFitter()
bbtf.fit(
donations['frequency'],
donations['recency'],
donations['periods'],
donations['weights'],
)
bbtf.data['prob_alive'] = bbtf.conditional_probability_alive(1, donations['frequency'], donations['recency'], donations['periods'])
# Expected probabilities for last year 1995-0 repeat, 1999-2 repeat, 2001-6 repeat
expected = np.array([0.11, 0.59, 0.93])
prob_list = np.zeros(3)
prob_list[0] = (bbtf.data[(bbtf.data['frequency'] == 0) & (bbtf.data['recency'] == 0)]['prob_alive'])
prob_list[1] = (bbtf.data[(bbtf.data['frequency'] == 2) & (bbtf.data['recency'] == 4)]['prob_alive'])
prob_list[2] = (bbtf.data[(bbtf.data['frequency'] == 6) & (bbtf.data['recency'] == 6)]['prob_alive'])
npt.assert_array_almost_equal(expected, prob_list, decimal=2)
def test_conditional_expectation_returns_same_value_as_Hardie_excel_sheet(self, donations):
"""
Total from Hardie's Conditional Expectations (II) sheet.
http://brucehardie.com/notes/010/BGBB_2011-01-20_XLSX.zip
"""
bbtf = estimation.BetaGeoBetaBinomFitter()
bbtf.fit(
donations['frequency'],
donations['recency'],
donations['periods'],
donations['weights'],
)
pred_purchases = bbtf.conditional_expected_number_of_purchases_up_to_time(5, donations['frequency'], donations['recency'], donations['periods']) * donations['weights']
expected = 12884.2 # Sum of column F Exp Tot
npt.assert_almost_equal(expected, pred_purchases.sum(), decimal=0)
pd.DataFrame(
[[row['frequency'], row['recency'], row['periods']]] * row['weights'],
columns = ['frequency', 'recency', 'periods']
))
exploded_dataset = exploded_dataset.astype(np.int64)
assert exploded_dataset.shape[0] == donations['weights'].sum()
bbtf_noweights = estimation.BetaGeoBetaBinomFitter()
bbtf_noweights.fit(
exploded_dataset['frequency'],
exploded_dataset['recency'],
exploded_dataset['periods'],
)
bbtf = estimation.BetaGeoBetaBinomFitter()
bbtf.fit(
donations['frequency'],
donations['recency'],
donations['periods'],
donations['weights'],
)
npt.assert_array_almost_equal(
np.array(bbtf_noweights._unload_params('alpha','beta','gamma','delta')),
np.array(bbtf._unload_params('alpha','beta','gamma','delta')),
decimal=4
)