Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def compare_output(baseline, current):
similarity = 50;
if (DEFAULT_ALGORITHM == 'ratio'):
similarity = fuzz.ratio(baseline, current)
elif (DEFAULT_ALGORITHM == 'partial_ratio'):
similarity = fuzz.partial_ratio(baseline, current)
elif (DEFAULT_ALGORITHM == 'token_sort_ratio'):
similarity = fuzz.token_sort_ratio(baseline, current)
elif (DEFAULT_ALGORITHM == 'partial_token_sort_ratio'):
similarity = fuzz.partial_token_sort_ratio(baseline, current)
elif (DEFAULT_ALGORITHM == 'token_set_ratio'):
similarity = fuzz.token_set_ratio(baseline, current)
else:
print("Unknown similarity measure " + DEFAULT_ALGORITHM + ". Aborting")
sys.exit(-1)
return similarity
def testPartialTokenSortRatio(self):
self.assertEqual(fuzz.partial_token_sort_ratio(self.s1, self.s1a), 100)
self.assertEqual(fuzz.partial_token_sort_ratio(self.s4, self.s5), 100)
self.assertEqual(fuzz.partial_token_sort_ratio(self.s8, self.s8a, full_process=False), 100)
self.assertEqual(fuzz.partial_token_sort_ratio(self.s9, self.s9a, full_process=True), 100)
self.assertEqual(fuzz.partial_token_sort_ratio(self.s9, self.s9a, full_process=False), 100)
self.assertEqual(fuzz.partial_token_sort_ratio(self.s10, self.s10a, full_process=False), 50)
"""
scorers = [fuzz.ratio,
fuzz.partial_ratio]
processors = [lambda x: x,
partial(utils.full_process, force_ascii=False),
partial(utils.full_process, force_ascii=True)]
splist = list(product(scorers, processors))
splist.extend(
[(fuzz.WRatio, partial(utils.full_process, force_ascii=True)),
(fuzz.QRatio, partial(utils.full_process, force_ascii=True)),
(fuzz.UWRatio, partial(utils.full_process, force_ascii=False)),
(fuzz.UQRatio, partial(utils.full_process, force_ascii=False)),
(fuzz.token_set_ratio, partial(utils.full_process, force_ascii=True)),
(fuzz.token_sort_ratio, partial(utils.full_process, force_ascii=True)),
(fuzz.partial_token_set_ratio, partial(utils.full_process, force_ascii=True)),
(fuzz.partial_token_sort_ratio, partial(utils.full_process, force_ascii=True))]
)
return splist
def testPartialTokenSortRatio(self):
self.assertEqual(fuzz.partial_token_sort_ratio(self.s1, self.s1a), 100)
self.assertEqual(fuzz.partial_token_sort_ratio(self.s4, self.s5), 100)
self.assertEqual(fuzz.partial_token_sort_ratio(self.s8, self.s8a, full_process=False), 100)
self.assertEqual(fuzz.partial_token_sort_ratio(self.s9, self.s9a, full_process=True), 100)
self.assertEqual(fuzz.partial_token_sort_ratio(self.s9, self.s9a, full_process=False), 100)
self.assertEqual(fuzz.partial_token_sort_ratio(self.s10, self.s10a, full_process=False), 50)
def testPartialTokenSortRatio(self):
self.assertEqual(fuzz.partial_token_sort_ratio(self.s1, self.s1a), 100)
self.assertEqual(fuzz.partial_token_sort_ratio(self.s4, self.s5), 100)
self.assertEqual(fuzz.partial_token_sort_ratio(self.s8, self.s8a, full_process=False), 100)
self.assertEqual(fuzz.partial_token_sort_ratio(self.s9, self.s9a, full_process=True), 100)
self.assertEqual(fuzz.partial_token_sort_ratio(self.s9, self.s9a, full_process=False), 100)
self.assertEqual(fuzz.partial_token_sort_ratio(self.s10, self.s10a, full_process=False), 50)
lst_dict.append({
'publisher0': t_arPub0_orig,
'pub0_cln': t_arPub0,
'vendor_X': t_cpeVen_orig,
'ven_cln': t_cpeVen,
'fz_ratio': fz.ratio(
t_cpeVen,
t_arPub0),
'fz_ptl_ratio': fz.partial_ratio(
t_cpeVen,
t_arPub0),
'fz_tok_set_ratio': fz.token_set_ratio(
t_cpeVen,
t_arPub0,
force_ascii=False),
'fz_ptl_tok_sort_ratio': fz.partial_token_sort_ratio(
t_cpeVen,
t_arPub0,
force_ascii=False),
'fz_uwratio': fz.UWRatio(
t_cpeVen,
t_arPub0)
})
mycount = mycount + 1
if mycount % 1000 == 0:
self.logger.debug(
'# entries produced: {0}\n'.format(
mycount
)
)
# # debug code to shorten loop for testing
match = str(match).strip().lower()
if len(match) == 0:
return []
parts = Part.objects.all()
matches = []
for part in parts:
compare = str(part.name).strip().lower()
if len(compare) == 0:
continue
ratio = fuzz.partial_token_sort_ratio(compare, match)
if compare_length:
# Also employ primitive length comparison
# TODO - Improve this somewhat...
l_min = min(len(match), len(compare))
l_max = max(len(match), len(compare))
ratio *= (l_min / l_max)
if ratio >= threshold:
matches.append({
'part': part,
'ratio': ratio
})
matches = sorted(matches, key=lambda item: item['ratio'], reverse=reverse)
def fuzzy(s1, s2):
return [fuzz.ratio(s1, s2) / 100,
fuzz.partial_ratio(s1, s2) / 100,
fuzz.token_sort_ratio(s1, s2) / 100,
fuzz.partial_token_sort_ratio(s1, s2) / 100,
fuzz.token_set_ratio(s1, s2) / 100,
fuzz.partial_token_set_ratio(s1, s2) / 100,
fuzz.QRatio(s1, s2) / 100,
fuzz.WRatio(s1, s2) / 100]