Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_params_same_from_sim_data(self, bbgb_params):
sim_data = beta_geometric_beta_binom_model(N=6, size=100000, **bbgb_params)
bbtf = estimation.BetaGeoBetaBinomFitter()
grouped_data = sim_data.groupby(['frequency', 'recency', 'n_periods'])['customer_id'].count()
grouped_data = grouped_data.reset_index().rename(columns={'customer_id': 'weights'})
bbtf.fit(grouped_data['frequency'],
grouped_data['recency'],
grouped_data['n_periods'],
grouped_data['weights'])
npt.assert_allclose(
np.asarray(list(bbgb_params.values())).astype(float),
np.asarray(bbtf._unload_params('alpha', 'beta', 'gamma', 'delta')).astype(float),
atol=0.1, rtol=1e-2)
def test_params_out_is_close_to_Hardie_paper(self, donations):
donations = donations
bbtf = estimation.BetaGeoBetaBinomFitter()
bbtf.fit(
donations['frequency'],
donations['recency'],
donations['periods'],
donations['weights'],
)
expected = np.array([1.204, 0.750, 0.657, 2.783])
npt.assert_array_almost_equal(expected, np.array(bbtf._unload_params('alpha','beta','gamma','delta')),
decimal=2)
def test_scaling_inputs_gives_same_or_similar_results(self, cdnow_customers):
bgf = estimation.BetaGeoFitter()
bgf.fit(cdnow_customers['frequency'], cdnow_customers['recency'], cdnow_customers['T'])
scale = 10
bgf_with_large_inputs = estimation.BetaGeoFitter()
bgf_with_large_inputs.fit(cdnow_customers['frequency'], scale * cdnow_customers['recency'], scale * cdnow_customers['T'], iterative_fitting=2)
assert bgf_with_large_inputs._scale < 1.
assert abs(bgf_with_large_inputs.conditional_probability_alive(1, scale * 1, scale * 2) - bgf.conditional_probability_alive(1, 1, 2)) < 10e-5
assert abs(bgf_with_large_inputs.conditional_probability_alive(1, scale * 2, scale * 10) - bgf.conditional_probability_alive(1, 2, 10)) < 10e-5
def test_fit_with_index(self, cdnow_customers):
ptf = estimation.ParetoNBDFitter()
index = range(len(cdnow_customers), 0, -1)
ptf.fit(
cdnow_customers['frequency'],
cdnow_customers['recency'],
cdnow_customers['T'],
index=index
)
assert (ptf.data.index == index).all() == True
ptf = estimation.ParetoNBDFitter()
ptf.fit(
cdnow_customers['frequency'],
cdnow_customers['recency'],
cdnow_customers['T'],
index=None
)
def test_probability_of_n_purchases_up_to_time_same_as_R_BTYD(self):
""" See https://cran.r-project.org/web/packages/BTYD/BTYD.pdf """
mbgf = estimation.ModifiedBetaGeoFitter()
mbgf.params_ = OrderedDict({'r':0.243, 'alpha':4.414, 'a':0.793, 'b':2.426})
# probability that a customer will make 10 repeat transactions in the
# time interval (0,2]
expected = 1.07869e-07
actual = mbgf.probability_of_n_purchases_up_to_time(2, 10)
assert abs(expected - actual) < 10e-5
# PMF
expected = np.array([0.0019995214, 0.0015170236, 0.0011633150, 0.0009003148, 0.0007023638,
0.0005517902, 0.0004361913, 0.0003467171, 0.0002769613, 0.0002222260])
actual = np.array([mbgf.probability_of_n_purchases_up_to_time(30, n) for n in range(11, 21)])
npt.assert_allclose(expected, actual, rtol=0.5)