Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_guess_encoding(self):
text = u'Порошенко Петро Олексійович'
encoded = text.encode('iso-8859-5')
out = guess_encoding(encoded)
self.assertEqual('iso-8859-5', out)
def read_file_decoded(self, entity, file_path):
with open(file_path, 'rb') as fh:
body = fh.read()
if not entity.has('encoding'):
entity.set('encoding', guess_encoding(body))
for encoding in entity.get('encoding'):
try:
body = body.decode(encoding)
if encoding != self.DEFAULT_ENCODING:
log.info("Decoding [%r] as: %s", entity, encoding)
return body
except UnicodeDecodeError as ude:
raise ProcessingException('Error decoding file as %s: %s' %
(encoding, ude)) from ude
a value if possible. If there are both ASCII and Unicode
versions, then the parameter /prefer/ specifies which will be
returned.
"""
if isinstance(filename, list):
# Join with slashes to make it easier to append the type
filename = "/".join(filename)
value = windowsUnicode(self._getStream(filename + '001F'))
if value is None:
raw = self._getStream(filename + '001E')
try:
value = decode_utf7(raw)
except Exception:
encoding = guess_encoding(raw)
value = raw.decode(encoding, 'replace')
if value is not None and len(value):
return remove_unsafe_chars(value)
a value if possible. If there are both ASCII and Unicode
versions, then the parameter /prefer/ specifies which will be
returned.
"""
if isinstance(filename, list):
# Join with slashes to make it easier to append the type
filename = "/".join(filename)
value = windowsUnicode(self._getStream(filename + '001F'))
if value is None:
raw = self._getStream(filename + '001E')
try:
value = decode_utf7(raw)
except Exception:
encoding = guess_encoding(raw)
value = raw.decode(encoding, 'replace')
if value is not None and len(value):
return remove_unsafe_chars(value)
def open_csv(file_path, encoding=None, delimiter=None):
if encoding is None:
with io.open(file_path, 'rb') as fh:
data = fh.read(SAMPLE_SIZE)
encoding = guess_encoding(data)
fh = io.open(file_path, 'r', encoding=encoding)
if delimiter is None:
data = fh.read(SAMPLE_SIZE)
dialect = csv.Sniffer().sniff(data)
delimiter = dialect.delimiter
fh.seek(0)
reader = csv.reader(fh, delimiter=delimiter)
headers = []
for row in reader:
headers = row
break
fh.seek(0)
return fh, delimiter, headers
def decode_string(self, text, encoding=DEFAULT_ENCODING):
if not isinstance(text, bytes):
return stringify(text)
encoding = normalize_encoding(encoding)
try:
return text.decode(encoding, 'strict')
except Exception:
try:
detected = guess_encoding(text)
return text.decode(detected, 'strict')
except Exception:
return text.decode(encoding, 'replace')