Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_asciionly(self):
for s in self.mixed_strings:
# ascii only only runs on strings
s = utils.asciidammit(s)
utils.asciionly(s)
def testCaseInsensitive(self):
self.assertNotEqual(fuzz.ratio(self.s1, self.s2), 100)
self.assertEqual(fuzz.ratio(utils.full_process(self.s1), utils.full_process(self.s2)), 100)
def scorers_processors():
"""
Generate a list of (scorer, processor) pairs for testing
:return: [(scorer, processor), ...]
"""
scorers = [fuzz.ratio,
fuzz.partial_ratio]
processors = [lambda x: x,
partial(utils.full_process, force_ascii=False),
partial(utils.full_process, force_ascii=True)]
splist = list(product(scorers, processors))
splist.extend(
[(fuzz.WRatio, partial(utils.full_process, force_ascii=True)),
(fuzz.QRatio, partial(utils.full_process, force_ascii=True)),
(fuzz.UWRatio, partial(utils.full_process, force_ascii=False)),
(fuzz.UQRatio, partial(utils.full_process, force_ascii=False)),
(fuzz.token_set_ratio, partial(utils.full_process, force_ascii=True)),
(fuzz.token_sort_ratio, partial(utils.full_process, force_ascii=True)),
(fuzz.partial_token_set_ratio, partial(utils.full_process, force_ascii=True)),
(fuzz.partial_token_sort_ratio, partial(utils.full_process, force_ascii=True))]
)
return splist
def testCheckForNone(self):
invalid_input = [
(None, None),
('Some', None),
(None, 'Some')
]
decorated_func = utils.check_for_none(self.testFunc)
for i in invalid_input:
self.assertEqual(decorated_func(*i), 0)
valid_input = ('Some', 'Some')
actual = decorated_func(*valid_input)
self.assertNotEqual(actual, 0)
def _process_and_sort(s, force_ascii, full_process=True):
"""Return a cleaned string with token sorted."""
# pull tokens
ts = utils.full_process(s, force_ascii=force_ascii) if full_process else s
tokens = ts.split()
# sort tokens and join
sorted_string = u" ".join(sorted(tokens))
return sorted_string.strip()
(on top of any partial scalars)
#. Take the highest value from these results
round it and return it as an integer.
:param s1:
:param s2:
:param force_ascii: Allow only ascii characters
:type force_ascii: bool
:full_process: Process inputs, used here to avoid double processing in extract functions (Default: True)
:return:
"""
if full_process:
p1 = utils.full_process(s1, force_ascii=force_ascii)
p2 = utils.full_process(s2, force_ascii=force_ascii)
else:
p1 = s1
p2 = s2
if not utils.validate_string(p1):
return 0
if not utils.validate_string(p2):
return 0
# should we look at partials?
try_partial = True
unbase_scale = .95
partial_scale = .90
base = ratio(p1, p2)
len_ratio = float(max(len(p1), len(p2))) / min(len(p1), len(p2))
def fp_ratio(s1, s2, force_ascii=True, full_process=True):
"""
Return a measure of the sequences' similarity between 0 and 100, using fuzz.ratio and fuzz.partial_ratio.
"""
if full_process:
p1 = utils.full_process(s1, force_ascii=force_ascii)
p2 = utils.full_process(s2, force_ascii=force_ascii)
else:
p1 = s1
p2 = s2
if not utils.validate_string(p1):
return 0
if not utils.validate_string(p2):
return 0
# should we look at partials?
try_partial = True
partial_scale = .9
base = fuzz.ratio(p1, p2)
len_ratio = float(max(len(p1), len(p2))-1) / min(len(p1), len(p2))
@utils.check_for_equivalence
@utils.check_empty_string
def ratio(s1, s2):
s1, s2 = utils.make_type_consistent(s1, s2)
m = SequenceMatcher(None, s1, s2)
return utils.intr(100 * m.ratio())
processor = no_process
# Run the processor on the input query.
processed_query = processor(query)
if len(processed_query) == 0:
logging.warning(u"Applied processor reduces input query to empty string, "
"all comparisons will have score 0. "
"[Query: \'{0}\']".format(query))
# Don't run full_process twice
if scorer in [fuzz.WRatio, fuzz.QRatio,
fuzz.token_set_ratio, fuzz.token_sort_ratio,
fuzz.partial_token_set_ratio, fuzz.partial_token_sort_ratio,
fuzz.UWRatio, fuzz.UQRatio] \
and processor == utils.full_process:
processor = no_process
# Only process the query once instead of for every choice
if scorer in [fuzz.UWRatio, fuzz.UQRatio]:
pre_processor = partial(utils.full_process, force_ascii=False)
scorer = partial(scorer, full_process=False)
elif scorer in [fuzz.WRatio, fuzz.QRatio,
fuzz.token_set_ratio, fuzz.token_sort_ratio,
fuzz.partial_token_set_ratio, fuzz.partial_token_sort_ratio]:
pre_processor = partial(utils.full_process, force_ascii=True)
scorer = partial(scorer, full_process=False)
else:
pre_processor = no_process
processed_query = pre_processor(processed_query)
try: