Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_empty(self):
self.assertEqual(None, slugify(None))
self.assertEqual(None, ascii_text(None))
self.assertEqual(None, latinize_text(None))
self.assertEqual(None, normalize(None))
self.assertEqual(None, normalize(''))
self.assertEqual(None, normalize(' '))
def compare_names(left, right):
result = 0
left_list = [normalize(n, latinize=True) for n in left.names]
right_list = [normalize(n, latinize=True) for n in right.names]
for (left, right) in itertools.product(left_list, right_list):
similarity = jaro(left, right)
score = similarity * dampen(2, 20, shortest(left, right))
result = max(result, score)
return result
def name_tokens(name):
name = normalize(name, ascii=True)
if name is None:
return []
return [n for n in name.split(' ') if len(n) > 1]
def prepare_geonames():
with io.open(GEONAMES_RAW_PATH, 'r', encoding='utf-8') as fh:
with shelve.open(GEONAMES_DB_PATH) as db:
for row in csv.reader(fh, delimiter='\t'):
country = normalize(row[8])
if country is None:
continue
names = set(row[3].split(','))
names.add(row[1])
names.add(row[2])
for name in names:
name = normalize(name)
if name is None:
continue
countries = db.get(name)
if countries:
countries.append(country)
db[name] = countries
else:
db[name] = [country]
for name in db:
countries = db[name]
db[name] = max(set(countries), key=countries.count)
def name_tokens(name):
name = normality.normalize(name, latinize=True)
# if len(name) > 2:
# return [name]
# return []
return [n for n in name.split(' ') if len(n)]
def _normalize_name(country):
"""Clean up a country name before comparison."""
return normalize(country, latinize=True)
def filter_value(self, q, filter_stmt):
if self.node.op == OP_EQ:
q = q.filter(filter_stmt._value == self.node.value)
elif self.node.op == OP_NOT:
q = q.filter(filter_stmt._value != self.node.value)
elif self.node.op == OP_IN:
q = q.filter(filter_stmt._value.in_(self.node.data))
elif self.node.op == OP_NIN:
q = q.filter(~filter_stmt._value.in_(self.node.data))
elif self.node.op == OP_LIKE:
value = '%%%s%%' % normalize(self.node.value)
q = q.filter(filter_stmt.normalized.like(value))
return q
def match_form(self, text):
"""Turn a string into a form appropriate for name matching."""
# The goal of this function is not to retain a readable version of the
# string, but rather to yield a normalised version suitable for
# comparisons and machine analysis.
text = normalize(text, lowercase=True, latinize=True)
if text is None:
return
# TODO: this is a weird heuristic, but to avoid overly aggressive
# matching it may make sense:
if ' ' not in text:
return
return text
def match_prefix(self, prefix):
prefix = normalize(prefix)
if not self.abstract:
if normalize(self.name).startswith(prefix):
return True
elif normalize(self.label).startswith(prefix):
return True
return False
def make_csv_file_name(meta, table, out_folder):
bank_name = normalize(meta['BankName'], lowercase=False)
if bank_name is None:
bank_name = 'Untitled Database'
table_abbr = normalize(table['abbr'], lowercase=False)
table_name = normalize(table['name'], lowercase=False)
file_name = '%s - %s - %s.csv' % (bank_name, table_abbr, table_name)
return os.path.join(out_folder, file_name)