Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def ratio(s1, s2):
s1, s2 = utils.make_type_consistent(s1, s2)
m = SequenceMatcher(None, s1, s2)
return utils.intr(100 * m.ratio())
# should we look at partials?
try_partial = True
partial_scale = .9
base = fuzz.ratio(p1, p2)
len_ratio = float(max(len(p1), len(p2))-1) / min(len(p1), len(p2))
# if strings are similar length, don't use partials
if len_ratio < 1.3:
try_partial = False
if try_partial:
partial = fuzz.partial_ratio(p1, p2) * partial_scale
return utils.intr(max(base, partial))
else:
return utils.intr(base)
if len_ratio > 8:
partial_scale = .6
if try_partial:
partial = partial_ratio(p1, p2) * partial_scale
ptsor = partial_token_sort_ratio(p1, p2, full_process=False) \
* unbase_scale * partial_scale
ptser = partial_token_set_ratio(p1, p2, full_process=False) \
* unbase_scale * partial_scale
return utils.intr(max(base, partial, ptsor, ptser))
else:
tsor = token_sort_ratio(p1, p2, full_process=False) * unbase_scale
tser = token_set_ratio(p1, p2, full_process=False) * unbase_scale
return utils.intr(max(base, tsor, tser))
# block = (1,3,3)
# best score === ratio("abcd", "Xbcd")
scores = []
for block in blocks:
long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0
long_end = long_start + len(shorter)
long_substr = longer[long_start:long_end]
m2 = SequenceMatcher(None, shorter, long_substr)
r = m2.ratio()
if r > .995:
return 100
else:
scores.append(r)
return utils.intr(100 * max(scores))
# if strings are similar length, don't use partials
if len_ratio < 1.5:
try_partial = False
# if one string is much much shorter than the other
if len_ratio > 8:
partial_scale = .6
if try_partial:
partial = partial_ratio(p1, p2) * partial_scale
ptsor = partial_token_sort_ratio(p1, p2, full_process=False) \
* unbase_scale * partial_scale
ptser = partial_token_set_ratio(p1, p2, full_process=False) \
* unbase_scale * partial_scale
return utils.intr(max(base, partial, ptsor, ptser))
else:
tsor = token_sort_ratio(p1, p2, full_process=False) * unbase_scale
tser = token_set_ratio(p1, p2, full_process=False) * unbase_scale
return utils.intr(max(base, tsor, tser))