Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
This will take a string and apply a set of transformations to it so
that it can be processed more easily afterwards. Arguments:
* ``lowercase``: not very mysterious.
* ``collapse``: replace multiple whitespace-like characters with a
single whitespace. This is especially useful with category replacement
which can lead to a lot of whitespace.
* ``decompose``: apply a unicode normalization (NFKD) to separate
simple characters and their diacritics.
* ``replace_categories``: This will perform a replacement of whole
classes of unicode characters (e.g. symbols, marks, numbers) with a
given character. It is used to replace any non-text elements of the
input string.
"""
text = stringify(text, encoding_default=encoding_default,
encoding=encoding)
if text is None:
return
if lowercase:
# Yeah I made a Python package for this.
text = text.lower()
if ascii:
# A stricter form of transliteration that leaves only ASCII
# characters.
text = ascii_text(text)
elif latinize:
# Perform unicode-based transliteration, e.g. of cyricllic
# or CJK scripts into latin.
text = latinize_text(text)
def _safe_name(file_name, sep):
"""Convert the file name to ASCII and normalize the string."""
file_name = stringify(file_name)
if file_name is None:
return
file_name = ascii_text(file_name)
file_name = category_replace(file_name, UNICODE_CATEGORIES)
file_name = collapse_spaces(file_name)
if file_name is None or not len(file_name):
return
return file_name.replace(WS, sep)
def slugify(text, sep='-'):
"""A simple slug generator."""
text = stringify(text)
if text is None:
return None
text = text.replace(sep, WS)
text = normalize(text, ascii=True)
if text is None:
return None
return text.replace(WS, sep)