Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# if no anchor is found, choose highest precision of best anchor candidate from every round
if best_tuple == ():
if verbose:
print('Could not find an anchor, now doing best of each size')
tuples = []
for i in range(0, current_size):
tuples.extend(best_of_size[i])
sample_fns = AnchorBaseBeam.get_sample_fns(sample_fn, tuples, state, data_type=dtype)
initial_stats = AnchorBaseBeam.get_initial_statistics(tuples, state)
chosen_tuples = AnchorBaseBeam.lucb(sample_fns, initial_stats, epsilon,
delta, batch_size, 1, verbose=verbose)
best_tuple = tuples[chosen_tuples[0]]
# return explanation dictionary
return AnchorBaseBeam.get_anchor_from_tuple(best_tuple, state)
lb = AnchorBaseBeam.dlow_bernoulli(mean, beta / state['t_nsamples'][t])
ub = AnchorBaseBeam.dup_bernoulli(mean, beta / state['t_nsamples'][t])
coverage = state['t_coverage'][t]
if verbose:
print(i, mean, lb, ub)
# while prec(A) >= tau and prec_lb(A) < tau - eps or prec(A) < tau and prec_ub(A) > tau + eps
# sample more data and update lower and upper precision bounds ...
# ... b/c respectively either prec_lb(A) or prec(A) needs to improve
while ((mean >= desired_confidence and lb < desired_confidence - epsilon_stop) or
(mean < desired_confidence and ub >= desired_confidence + epsilon_stop)):
# sample a batch of data, get new precision, lb and ub values
sample_fns[i](batch_size)
mean = state['t_positives'][t] / state['t_nsamples'][t]
lb = AnchorBaseBeam.dlow_bernoulli(mean, beta / state['t_nsamples'][t])
ub = AnchorBaseBeam.dup_bernoulli(mean, beta / state['t_nsamples'][t])
if verbose:
print('%s mean = %.2f lb = %.2f ub = %.2f coverage: %.2f n: %d' %
(t, mean, lb, ub, coverage, state['t_nsamples'][t]))
# if prec(A) > tau and prec_lb(A) > tau - eps then we found an eligible anchor
if mean >= desired_confidence and lb > desired_confidence - epsilon_stop:
if verbose:
print('Found eligible anchor ', t, 'Coverage:',
coverage, 'Is best?', coverage > best_coverage)
# coverage eligible anchor needs to be bigger than current best coverage
if coverage > best_coverage:
best_coverage = coverage
best_coverage = coverage
best_tuple = t
if best_coverage == 1 or stop_on_first:
stop_this = True
if stop_this:
break
current_size += 1
# if no anchor is found, choose highest precision of best anchor candidate from every round
if best_tuple == ():
if verbose:
print('Could not find an anchor, now doing best of each size')
tuples = []
for i in range(0, current_size):
tuples.extend(best_of_size[i])
sample_fns = AnchorBaseBeam.get_sample_fns(sample_fn, tuples, state, data_type=dtype)
initial_stats = AnchorBaseBeam.get_initial_statistics(tuples, state)
chosen_tuples = AnchorBaseBeam.lucb(sample_fns, initial_stats, epsilon,
delta, batch_size, 1, verbose=verbose)
best_tuple = tuples[chosen_tuples[0]]
# return explanation dictionary
return AnchorBaseBeam.get_anchor_from_tuple(best_tuple, state)
best_tuple = t
if best_coverage == 1 or stop_on_first:
stop_this = True
if stop_this:
break
current_size += 1
# if no anchor is found, choose highest precision of best anchor candidate from every round
if best_tuple == ():
if verbose:
print('Could not find an anchor, now doing best of each size')
tuples = []
for i in range(0, current_size):
tuples.extend(best_of_size[i])
sample_fns = AnchorBaseBeam.get_sample_fns(sample_fn, tuples, state, data_type=dtype)
initial_stats = AnchorBaseBeam.get_initial_statistics(tuples, state)
chosen_tuples = AnchorBaseBeam.lucb(sample_fns, initial_stats, epsilon,
delta, batch_size, 1, verbose=verbose)
best_tuple = tuples[chosen_tuples[0]]
# return explanation dictionary
return AnchorBaseBeam.get_anchor_from_tuple(best_tuple, state)
if best_coverage == 1 or stop_on_first:
stop_this = True
if stop_this:
break
current_size += 1
# if no anchor is found, choose highest precision of best anchor candidate from every round
if best_tuple == ():
if verbose:
print('Could not find an anchor, now doing best of each size')
tuples = []
for i in range(0, current_size):
tuples.extend(best_of_size[i])
sample_fns = AnchorBaseBeam.get_sample_fns(sample_fn, tuples, state, data_type=dtype)
initial_stats = AnchorBaseBeam.get_initial_statistics(tuples, state)
chosen_tuples = AnchorBaseBeam.lucb(sample_fns, initial_stats, epsilon,
delta, batch_size, 1, verbose=verbose)
best_tuple = tuples[chosen_tuples[0]]
# return explanation dictionary
return AnchorBaseBeam.get_anchor_from_tuple(best_tuple, state)
def update_bounds(t: int) -> Tuple[np.ndarray, np.ndarray]:
"""
Parameters
----------
t
Iteration number
Returns
-------
Upper and lower precision bound indices.
"""
sorted_means = np.argsort(means) # ascending sort of anchor candidates by precision
beta = AnchorBaseBeam.compute_beta(n_features, t, delta)
# J = the beam width top anchor candidates with highest precision
# not_J = the rest
J = sorted_means[-top_n:]
not_J = sorted_means[:-top_n]
for f in not_J: # update upper bound for lowest precision anchor candidates
ub[f] = AnchorBaseBeam.dup_bernoulli(means[f], beta / n_samples[f])
for f in J: # update lower bound for highest precision anchor candidates
lb[f] = AnchorBaseBeam.dlow_bernoulli(means[f], beta / n_samples[f])
# for the low precision anchor candidates, compute the upper precision bound and keep the index ...
# ... of the anchor candidate with the highest upper precision value -> ut
# for the high precision anchor candidates, compute the lower precision bound and keep the index ...
# ... of the anchor candidate with the lowest lower precision value -> lt
't_coverage': collections.defaultdict(lambda: 0.),
'coverage_data': coverage_data,
't_order': collections.defaultdict(lambda: list())
}
current_size = 1
best_of_size = {0: []} # type: Dict[int, list]
best_coverage = -1
best_tuple = ()
if max_anchor_size is None:
max_anchor_size = n_features
# find best anchor using beam search until max anchor size
while current_size <= max_anchor_size:
# create new candidate anchors by adding features to current best anchors
tuples = AnchorBaseBeam.make_tuples(best_of_size[current_size - 1], state)
# goal is to max coverage given precision constraint P(prec(A) > tau) > 1 - delta (eq.4)
# so keep tuples with higher coverage than current best coverage
tuples = [x for x in tuples if state['t_coverage'][x] > best_coverage]
# if no better coverage found with added features -> break
if len(tuples) == 0:
break
# build sample functions for each tuple in tuples list
# these functions sample randomly for all features except for the ones in the candidate anchors
# for the features in the anchor it uses the same category (categorical features) or samples from ...
# ... the same bin (discretized numerical features) as the feature in the observation that is explained
sample_fns = AnchorBaseBeam.get_sample_fns(sample_fn, tuples, state, data_type=dtype)
# for each tuple, get initial nb of samples used and prec(A)