Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_parse_classifier(self):
all_tests = load_tests(False) + load_tests(True)
# forcedly activate classifier
clf.USE_CLF = True
for test in sorted(all_tests, key=lambda x: len(x['req'])):
quants = p.parse(test['req'])
self.assertEqual(
quants, test['res'],
"{} \n {}".format([quant.__dict__ for quant in quants],
[quant.__dict__ for quant in test['res']]))
def test_training(self):
# TODO - update test to not overwirte existing clf.pickle and wiki.json files.
clf.train_classifier(False)
clf.train_classifier(True)
def disambiguate_unit(unit_surface, text, lang="en_US"):
"""
Resolve ambiguity between units with same names, symbols or abbreviations.
:returns (str) unit name of the resolved unit
"""
if clf.USE_CLF:
base = clf.disambiguate_unit(unit_surface, text, lang).name
else:
base = (
load.units(lang).symbols[unit_surface]
or load.units(lang).surfaces[unit_surface]
or load.units(lang).surfaces_lower[unit_surface.lower()]
or load.units(lang).symbols_lower[unit_surface.lower()]
)
if len(base) > 1:
base = no_clf.disambiguate_no_classifier(base, text, lang)
elif len(base) == 1:
base = next(iter(base))
if base:
base = base.name
def glove_via_magnitude(topn=200,
min_similarity=None,
filename='glove.6B.100d.magnitude'):
from pymagnitude import Magnitude
v = Magnitude(os.path.join(TOPDIR, filename))
training_set = list()
units = set()
for unit_list in classifier.ambiguous_units():
for unit in unit_list[1]:
units.add(unit)
for unit in units:
print('Processing {}...'.format(unit.name))
name = unit.name
surfaces = set(unit.name)
if isinstance(unit, classes.Unit):
surfaces.update(unit.surfaces)
surfaces.update(unit.symbols)
for surface in surfaces:
neighbours = v.most_similar(
v.query(surface), topn=topn, min_similarity=min_similarity)
training_set.append({
'unit':
name,
def disambiguate_entity(key, text, lang="en_US"):
"""
Resolve ambiguity between entities with same dimensionality.
"""
try:
if clf.USE_CLF:
ent = clf.disambiguate_entity(key, text, lang)
else:
derived = load.entities().derived[key]
if len(derived) > 1:
ent = no_clf.disambiguate_no_classifier(derived, text, lang)
ent = load.entities().names[ent]
elif len(derived) == 1:
ent = next(iter(derived))
else:
ent = None
except (KeyError, StopIteration):
ent = None
return ent
def glove_via_magnitude(
topn=500, min_similarity=None, filename="glove.6B.100d.magnitude", lang="en_US"
):
from pymagnitude import Magnitude
v = Magnitude(os.path.join(TOPDIR, filename))
training_set = list()
units = set()
for unit_list in classifier.ambiguous_units():
for unit in unit_list[1]:
units.add(unit)
for unit in units:
print("Processing {}...".format(unit.name))
name = unit.name
surfaces = set(unit.name)
if isinstance(unit, classes.Unit):
surfaces.update(unit.surfaces)
surfaces.update(unit.symbols)
for surface in surfaces:
neighbours = v.most_similar(
v.query(surface), topn=topn, min_similarity=min_similarity
)
training_set.append(
{