Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
vec.fit([doc])
w_spans = get_weighted_spans(
doc, vec,
FeatureWeights(
pos=[FW('see', 2), FW('leaning lemon', 5), FW('lemon tree', 8)],
neg=[FW('tree', -6)]))
assert w_spans == WeightedSpans(
analyzer='word',
document='i see: a leaning lemon tree',
weighted_spans=[
('see', [(2, 5)], 2),
('tree', [(23, 27)], -6),
('leaning lemon', [(9, 16), (17, 22)], 5),
('lemon tree', [(17, 22), (23, 27)], 8)],
other=FeatureWeights(
pos=[FW(hl_in_text, 9)],
neg=[],
))
def test_weighted_spans_word():
doc = 'I see: a leaning lemon tree'
vec = CountVectorizer(analyzer='word')
vec.fit([doc])
w_spans = get_weighted_spans(
doc, vec,
FeatureWeights(
pos=[FW('see', 2), FW('lemon', 4), FW('bias', 8)],
neg=[FW('tree', -6)],
neg_remaining=10
))
assert w_spans == WeightedSpans(
analyzer='word',
document='i see: a leaning lemon tree',
weighted_spans=[
('see', [(2, 5)], 2),
('lemon', [(17, 22)], 4),
('tree', [(23, 27)], -6)],
other=FeatureWeights(
pos=[FW('bias', 8), FW(hl_in_text, 0)],
neg=[],
neg_remaining=10,
))
def test_unhashed_features_other():
""" Check that when there are several candidates, they do not appear in "other"
if at least one is found. If none are found, they should appear in "other"
together.
"""
doc = 'I see: a leaning lemon tree'
vec = CountVectorizer(analyzer='char', ngram_range=(3, 3))
vec.fit([doc])
w_spans = get_weighted_spans(
doc, vec,
FeatureWeights(
pos=[
FW([{'name': 'foo', 'sign': 1}, {'name': 'see', 'sign': -1}], 2),
FW([{'name': 'zoo', 'sign': 1}, {'name': 'bar', 'sign': 1}], 3),
],
neg=[
FW([{'name': 'ree', 'sign': 1}, {'name': 'tre', 'sign': 1}], -4),
],
))
assert w_spans == WeightedSpans(
analyzer='char',
document='i see: a leaning lemon tree',
weighted_spans=[
('see', [(2, 5)], 2),
('tre', [(23, 26)], -4),
('ree', [(24, 27)], -4),
],
other=FeatureWeights(
pos=[
doc = 'I see: a leaning lemon tree'
vec = CountVectorizer(analyzer='word', stop_words='english')
vec.fit([doc])
w_spans = get_weighted_spans(
doc, vec,
FeatureWeights(
pos=[FW('see', 2), FW('lemon', 5), FW('bias', 8)],
neg=[FW('tree', -6)]))
assert w_spans == WeightedSpans(
analyzer='word',
document='i see: a leaning lemon tree',
weighted_spans=[
('lemon', [(17, 22)], 5),
('tree', [(23, 27)], -6)],
other=FeatureWeights(
pos=[FW('bias', 8), FW('see', 2)],
neg=[FW(hl_in_text, -1)],
))
def test_targets_with_value():
expl = Explanation(
estimator='some estimator',
targets=[
TargetExplanation(
'y', feature_weights=FeatureWeights(
pos=[FeatureWeight('a', 13, value=1),
FeatureWeight('b', 5, value=2)],
neg=[FeatureWeight('neg1', -10, value=3),
FeatureWeight('neg2', -1, value=4)],
)),
TargetExplanation(
'y2', feature_weights=FeatureWeights(
pos=[FeatureWeight('f', 1, value=5)],
neg=[],
)),
],
)
df = format_as_dataframe(expl)
expected_df = pd.DataFrame(
{'weight': [13, 5, -1, -10, 1],
'value': [1, 2, 4, 3, 5]},
columns=['weight', 'value'],
index=pd.MultiIndex.from_tuples(
def test_targets_with_value():
expl = Explanation(
estimator='some estimator',
targets=[
TargetExplanation(
'y', feature_weights=FeatureWeights(
pos=[FeatureWeight('a', 13, value=1),
FeatureWeight('b', 5, value=2)],
neg=[FeatureWeight('neg1', -10, value=3),
FeatureWeight('neg2', -1, value=4)],
)),
TargetExplanation(
'y2', feature_weights=FeatureWeights(
pos=[FeatureWeight('f', 1, value=5)],
neg=[],
)),
],
)
df = format_as_dataframe(expl)
expected_df = pd.DataFrame(
{'weight': [13, 5, -1, -10, 1],
'value': [1, 2, 4, 3, 5]},
columns=['weight', 'value'],
index=pd.MultiIndex.from_tuples(
[('y', 'a'), ('y', 'b'), ('y', 'neg2'), ('y', 'neg1'),
('y2', 'f')], names=['target', 'feature']))
print(df, expected_df, sep='\n')
assert expected_df.equals(df)
def _features(indices, feature_names, coef, x):
names = mask(feature_names, indices)
weights = mask(coef, indices)
if x is not None:
values = mask(x, indices)
return [FeatureWeight(name, weight, value=value)
for name, weight, value in zip(names, weights, values)]
else:
return [FeatureWeight(name, weight)
for name, weight in zip(names, weights)]
other_items = [] # type: List[FeatureWeight]
accounted_keys = set() # type: Set[Tuple[str, int]]
all_found_features = set() # type: Set[Tuple[str, int]]
for _, found_features in named_found_features:
all_found_features.update(found_features)
for group in ['pos', 'neg']:
for idx, fw in enumerate(getattr(feature_weights, group)):
key = (group, idx)
if key not in all_found_features and key not in accounted_keys:
other_items.append(fw)
accounted_keys.add(key)
for vec_name, found_features in named_found_features:
if found_features:
other_items.append(FeatureWeight(
feature=FormattedFeatureName(
'{}Highlighted in text (sum)'.format(
'{}: '.format(vec_name) if vec_name else '')),
weight=sum(found_features.values())))
other_items.sort(key=lambda x: abs(x.weight), reverse=True)
return FeatureWeights(
pos=[fw for fw in other_items if fw.weight >= 0],
neg=[fw for fw in other_items if fw.weight < 0],
pos_remaining=feature_weights.pos_remaining,
neg_remaining=feature_weights.neg_remaining,
)
def __init__(self, *args, **kwargs):
self.dictionary = kwargs['dictionary'] if 'dictionary' in kwargs else None
self.formatted_value = kwargs['formatted_value'] if 'formatted_value' in kwargs else None
self.score = kwargs['score'] if 'score' in kwargs else None
if 'dictionary' in kwargs:
del kwargs['dictionary']
if 'formatted_value' in kwargs:
del kwargs['formatted_value']
if 'score' in kwargs:
del kwargs['score']
FeatureWeight.__init__(self, *args, **kwargs)