How to use the normality.cleaning.remove_unsafe_chars function in normality

To help you get started, we’ve selected a few normality examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github alephdata / followthemoney / followthemoney / util.py View on Github external
def sanitize_text(text, encoding=DEFAULT_ENCODING):
    text = stringify(text, encoding_default=encoding)
    if text is not None:
        try:
            text = compose_nfc(text)
        except (SystemError, Exception) as ex:
            log.warning("Cannot NFC text: %s", ex)
            return None
        text = remove_unsafe_chars(text)
        text = text.encode(DEFAULT_ENCODING, "replace")
        return text.decode(DEFAULT_ENCODING, "replace")
github pudo / normality / normality / stringify.py View on Github external
object into a unicode string. It is guaranteed to either return unicode or
    None, if all conversions failed (or the value is indeed empty).
    """
    if value is None:
        return None

    if not isinstance(value, six.text_type):
        if isinstance(value, (date, datetime)):
            return value.isoformat()
        elif isinstance(value, (float, Decimal)):
            return Decimal(value).to_eng_string()
        elif isinstance(value, six.binary_type):
            if encoding is None:
                encoding = guess_encoding(value, default=encoding_default)
            value = value.decode(encoding, 'replace')
            value = remove_unsafe_chars(value)
        else:
            value = six.text_type(value)

    # XXX: is this really a good idea?
    value = value.strip()
    if not len(value):
        return None
    return value
github occrp-attic / ingestors / ingestors / email / outlookmsg_lib.py View on Github external
if isinstance(filename, list):
            # Join with slashes to make it easier to append the type
            filename = "/".join(filename)

        value = windowsUnicode(self._getStream(filename + '001F'))
        if value is None:
            raw = self._getStream(filename + '001E')
            try:
                value = decode_utf7(raw)
            except Exception:
                encoding = guess_encoding(raw)
                value = raw.decode(encoding, 'replace')

        if value is not None and len(value):
            return remove_unsafe_chars(value)
github pudo / dbcopy / dbcopy / db.py View on Github external
def _convert_value(self, value, table, column):
        if isinstance(column.type, (types.DateTime, types.Date)):
            if value in ('0000-00-00 00:00:00', '0000-00-00'):
                value = None
        if isinstance(column.type, (types.String, types.Unicode)):
            if isinstance(value, str):
                value = remove_unsafe_chars(value)
        return value
github alephdata / aleph / services / ingest-file / ingestors / email / outlookmsg_lib.py View on Github external
if isinstance(filename, list):
            # Join with slashes to make it easier to append the type
            filename = "/".join(filename)

        value = windowsUnicode(self._getStream(filename + '001F'))
        if value is None:
            raw = self._getStream(filename + '001E')
            try:
                value = decode_utf7(raw)
            except Exception:
                encoding = guess_encoding(raw)
                value = raw.decode(encoding, 'replace')

        if value is not None and len(value):
            return remove_unsafe_chars(value)
github occrp-attic / ingestors / ingestors / util.py View on Github external
def safe_string(data, encoding_default='utf-8', encoding=None):
    """Stringify and round-trip through encoding."""
    data = stringify(data,
                     encoding_default=encoding_default,
                     encoding=encoding)
    data = remove_unsafe_chars(data)
    if data is None:
        return
    data = data.encode(encoding_default, 'replace')
    data = data.decode(encoding_default, 'strict')
    return data